Merge pull request #2 from wtsi-hgi/feature/arv-view

author Joshua C. Randall <jcrandall@alum.mit.edu>

Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)

committer Joshua C. Randall <jcrandall@alum.mit.edu>

Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)
author Joshua C. Randall <jcrandall@alum.mit.edu>
Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)
committer Joshua C. Randall <jcrandall@alum.mit.edu>
Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)
diff --git a/.gitignore b/.gitignore

index eec475862e6ec2a87554e0fca90697e87f441bf5..6cbcccc895805615751c9bd6d714d862359b19e1 100644 (file)
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,9 @@
  .rvmrc
  *~
  *.pyc
+*.gem
+*.rpm
+*.deb
  docker/*/generated
  docker/config.yml
  doc/.site
@@ -16,3 +19,5 @@ sdk/java/target
  *.class
  sdk/java/log
  /tmp
+sdk/python/tmp
+services/fuse/tmp
diff --git a/apps/workbench/Gemfile b/apps/workbench/Gemfile

index b51f674d90f68bfb50d9304068f915e42b04aea4..e35cc83ece303aec8b84c4e43e9a15f0258ba7d4 100644 (file)
--- a/apps/workbench/Gemfile
+++ b/apps/workbench/Gemfile
@@ -1,10 +1,9 @@
  source 'https://rubygems.org'
  
  gem 'rails', '~> 4.1.0'
-gem 'arvados', '>= 0.1.20150313191637'
-
-gem 'sqlite3'
+gem 'arvados', '>= 0.1.20150511150219'
  
+gem 'activerecord-nulldb-adapter'
  gem 'multi_json'
  gem 'oj'
  gem 'sass'
@@ -88,7 +87,7 @@ gem 'piwik_analytics'
  gem 'httpclient', '~> 2.5'
  
  # This fork has Rails 4 compatible routes
-gem 'themes_for_rails', git: 'https://github.com/holtkampw/themes_for_rails', ref: '1fd2d7897d75ae0d6375f4c390df87b8e91ad417'
+gem 'themes_for_rails', git: 'https://github.com/curoverse/themes_for_rails'
  
  gem "deep_merge", :require => 'deep_merge/rails_compat'
  
diff --git a/apps/workbench/Gemfile.lock b/apps/workbench/Gemfile.lock

index 19b2857358fe208a52e50dcf50953897856f2da1..20b8d6164ccca273e11756928a21c1a17851f07a 100644 (file)
--- a/apps/workbench/Gemfile.lock
+++ b/apps/workbench/Gemfile.lock
@@ -1,7 +1,6 @@
  GIT
-  remote: https://github.com/holtkampw/themes_for_rails
-  revision: 1fd2d7897d75ae0d6375f4c390df87b8e91ad417
-  ref: 1fd2d7897d75ae0d6375f4c390df87b8e91ad417
+  remote: https://github.com/curoverse/themes_for_rails
+  revision: 61154877047d2346890bda0b7be5827cf51a6a76
    specs:
      themes_for_rails (0.5.1)
        rails (>= 3.0.0)
@@ -10,27 +9,29 @@ GEM
    remote: https://rubygems.org/
    specs:
      RedCloth (4.2.9)
-    actionmailer (4.1.9)
-      actionpack (= 4.1.9)
-      actionview (= 4.1.9)
+    actionmailer (4.1.12)
+      actionpack (= 4.1.12)
+      actionview (= 4.1.12)
        mail (~> 2.5, >= 2.5.4)
-    actionpack (4.1.9)
-      actionview (= 4.1.9)
-      activesupport (= 4.1.9)
+    actionpack (4.1.12)
+      actionview (= 4.1.12)
+      activesupport (= 4.1.12)
        rack (~> 1.5.2)
        rack-test (~> 0.6.2)
-    actionview (4.1.9)
-      activesupport (= 4.1.9)
+    actionview (4.1.12)
+      activesupport (= 4.1.12)
        builder (~> 3.1)
        erubis (~> 2.7.0)
-    activemodel (4.1.9)
-      activesupport (= 4.1.9)
+    activemodel (4.1.12)
+      activesupport (= 4.1.12)
        builder (~> 3.1)
-    activerecord (4.1.9)
-      activemodel (= 4.1.9)
-      activesupport (= 4.1.9)
+    activerecord (4.1.12)
+      activemodel (= 4.1.12)
+      activesupport (= 4.1.12)
        arel (~> 5.0.0)
-    activesupport (4.1.9)
+    activerecord-nulldb-adapter (0.3.1)
+      activerecord (>= 2.0.0)
+    activesupport (4.1.12)
        i18n (~> 0.6, >= 0.6.9)
        json (~> 1.7, >= 1.7.7)
        minitest (~> 5.1)
@@ -40,7 +41,7 @@ GEM
      andand (1.3.3)
      angularjs-rails (1.3.8)
      arel (5.0.1.20140414130214)
-    arvados (0.1.20150313191637)
+    arvados (0.1.20150511150219)
        activesupport (>= 3.2.13)
        andand (~> 1.3, >= 1.3.3)
        google-api-client (~> 0.6.3, >= 0.6.3)
@@ -112,13 +113,12 @@ GEM
        uuidtools (>= 2.1.0)
      headless (1.0.2)
      highline (1.6.21)
-    hike (1.2.3)
      httpclient (2.6.0.1)
      i18n (0.7.0)
      jquery-rails (3.1.2)
        railties (>= 3.0, < 5.0)
        thor (>= 0.14, < 2.0)
-    json (1.8.2)
+    json (1.8.3)
      jwt (0.1.13)
        multi_json (>= 1.5)
      launchy (2.4.3)
@@ -132,14 +132,14 @@ GEM
      mail (2.6.3)
        mime-types (>= 1.16, < 3)
      metaclass (0.0.4)
-    mime-types (2.4.3)
+    mime-types (2.6.1)
      mini_portile (0.6.2)
-    minitest (5.5.1)
+    minitest (5.7.0)
      mocha (1.1.0)
        metaclass (~> 0.0.1)
      morrisjs-rails (0.5.1)
        railties (> 3.1, < 5)
-    multi_json (1.10.1)
+    multi_json (1.11.1)
      multipart-post (1.2.0)
      net-scp (1.2.1)
        net-ssh (>= 2.6.5)
@@ -164,25 +164,25 @@ GEM
        cliver (~> 0.3.1)
        multi_json (~> 1.0)
        websocket-driver (>= 0.2.0)
-    rack (1.5.2)
+    rack (1.5.5)
      rack-mini-profiler (0.9.2)
        rack (>= 1.1.3)
      rack-test (0.6.3)
        rack (>= 1.0)
-    rails (4.1.9)
-      actionmailer (= 4.1.9)
-      actionpack (= 4.1.9)
-      actionview (= 4.1.9)
-      activemodel (= 4.1.9)
-      activerecord (= 4.1.9)
-      activesupport (= 4.1.9)
+    rails (4.1.12)
+      actionmailer (= 4.1.12)
+      actionpack (= 4.1.12)
+      actionview (= 4.1.12)
+      activemodel (= 4.1.12)
+      activerecord (= 4.1.12)
+      activesupport (= 4.1.12)
        bundler (>= 1.3.0, < 2.0)
-      railties (= 4.1.9)
+      railties (= 4.1.12)
        sprockets-rails (~> 2.0)
      rails-perftest (0.0.5)
-    railties (4.1.9)
-      actionpack (= 4.1.9)
-      activesupport (= 4.1.9)
+    railties (4.1.12)
+      actionpack (= 4.1.12)
+      activesupport (= 4.1.12)
        rake (>= 0.8.7)
        thor (>= 0.18.1, < 2.0)
      rake (10.4.2)
@@ -220,22 +220,18 @@ GEM
      simplecov-rcov (0.2.3)
        simplecov (>= 0.4.1)
      slop (3.6.0)
-    sprockets (2.12.3)
-      hike (~> 1.2)
-      multi_json (~> 1.0)
+    sprockets (3.2.0)
        rack (~> 1.0)
-      tilt (~> 1.1, != 1.3.0)
-    sprockets-rails (2.2.2)
+    sprockets-rails (2.3.2)
        actionpack (>= 3.0)
        activesupport (>= 3.0)
        sprockets (>= 2.8, < 4.0)
-    sqlite3 (1.3.10)
      sshkey (1.6.1)
      therubyracer (0.12.1)
        libv8 (~> 3.16.14.0)
        ref
      thor (0.19.1)
-    thread_safe (0.3.4)
+    thread_safe (0.3.5)
      tilt (1.4.1)
      tzinfo (1.2.2)
        thread_safe (~> 0.1)
@@ -256,9 +252,10 @@ PLATFORMS
  
  DEPENDENCIES
    RedCloth
+  activerecord-nulldb-adapter
    andand
    angularjs-rails
-  arvados (>= 0.1.20150313191637)
+  arvados (>= 0.1.20150511150219)
    bootstrap-sass (~> 3.1.0)
    bootstrap-tab-history-rails
    bootstrap-x-editable-rails
@@ -292,7 +289,6 @@ DEPENDENCIES
    selenium-webdriver
    simplecov (~> 0.7)
    simplecov-rcov
-  sqlite3
    sshkey
    themes_for_rails!
    therubyracer
diff --git a/apps/workbench/app/assets/images/mouse-move.gif b/apps/workbench/app/assets/images/mouse-move.gif

new file mode 100644 (file)

index 0000000..497b159

Binary files /dev/null and b/apps/workbench/app/assets/images/mouse-move.gif differ
diff --git a/apps/workbench/app/assets/images/pipeline-running.gif b/apps/workbench/app/assets/images/pipeline-running.gif

new file mode 100644 (file)

index 0000000..64e9009

Binary files /dev/null and b/apps/workbench/app/assets/images/pipeline-running.gif differ
diff --git a/apps/workbench/app/assets/javascripts/add_repository.js b/apps/workbench/app/assets/javascripts/add_repository.js

new file mode 100644 (file)

index 0000000..9594f9c
--- /dev/null
+++ b/apps/workbench/app/assets/javascripts/add_repository.js
@@ -0,0 +1,38 @@
+$(document).on('shown.bs.modal', '#add-repository-modal', function(event) {
+    $('input[type=text]', event.target).val('');
+    $('#add-repository-error', event.target).hide();
+}).on('submit', '#add-repository-form', function(event) {
+    var $form = $(event.target),
+    $submit = $(':submit', $form),
+    $error = $('#add-repository-error', $form),
+    repo_owner_uuid = $('input[name="add_repo_owner_uuid"]', $form).val(),
+    repo_prefix = $('input[name="add_repo_prefix"]', $form).val(),
+    repo_basename = $('input[name="add_repo_basename"]', $form).val();
+
+    $submit.prop('disabled', true);
+    $error.hide();
+    $.ajax('/repositories',
+           {method: 'POST',
+            dataType: 'json',
+            data: {repository: {owner_uuid: repo_owner_uuid,
+                                name: repo_prefix + repo_basename}},
+            context: $form}).
+        done(function(data, status, jqxhr) {
+            location.reload();
+        }).
+        fail(function(jqxhr, status, error) {
+            var errlist = jqxhr.responseJSON.errors;
+            var errmsg;
+            if (Array.isArray(errlist)) {
+                errmsg = errlist.join();
+            } else {
+                errmsg = ("The server returned an error when making " +
+                          "this repository (status " + jqxhr.status +
+                          ": " + errlist + ").");
+            }
+            $error.text(errmsg);
+            $error.show();
+            $submit.prop('disabled', false);
+        });
+    return false;
+});
diff --git a/apps/workbench/app/assets/javascripts/application.js b/apps/workbench/app/assets/javascripts/application.js

index 172ff873e83dd72acd92fdc7977118126da09443..65e856df3a90143a5955def3698acc89784ff438 100644 (file)
--- a/apps/workbench/app/assets/javascripts/application.js
+++ b/apps/workbench/app/assets/javascripts/application.js
@@ -145,6 +145,12 @@ jQuery(function($){
          on('ready ajax:complete', function() {
              // This makes the dialog close on Esc key, obviously.
              $('.modal').attr('tabindex', '-1')
+        }).
+        on('ready', function() {
+            // Need this to trigger input validation/synchronization callbacks because some browsers
+            // auto-fill form fields (e.g., when navigating "back" to a page where some text
+            // had been entered in a search box) without triggering a change or input event.
+            $('input').trigger('input');
          });
  
      HeaderRowFixer = function(selector) {
diff --git a/apps/workbench/app/assets/javascripts/infinite_scroll.js b/apps/workbench/app/assets/javascripts/infinite_scroll.js

index 81a3a4639b8c7f63a2b42a416252664d746a6b78..047858c5a0e3a9811408de40442f24605994868d 100644 (file)
--- a/apps/workbench/app/assets/javascripts/infinite_scroll.js
+++ b/apps/workbench/app/assets/javascripts/infinite_scroll.js
@@ -1,3 +1,37 @@
+// infinite_scroll.js displays a tab's content using automatic scrolling
+// when the user scrolls to the bottom of the page and there is more data.
+//
+// Usage:
+//
+// 1. Adding infinite scrolling to a tab pane using "show" method
+//
+//  The steps below describe adding scrolling to the project#show action.
+//
+//  a. In the "app/views/projects/" folder add a file for your tab
+//      (ex: _show_jobs_and_pipelines.html.erb)
+//    In this file, add a div or tbody with data-infinite-scroller.
+//      Note: This page uses _show_tab_contents.html.erb so that
+//            several tabs can reuse this implementation.
+//    Also add the filters to be used for loading the tab content.
+//
+//  b. Add a file named "_show_contents_rows.html.erb" that loads
+//    the data (by invoking get_objects_and_names from the controller).
+//
+//  c. In the "app/controllers/projects_controller.rb,
+//    Update the show method to add a block for "params[:partial]"
+//      that loads the show_contents_rows partial.
+//    Optionally, add a "tab_counts" method that loads the total number
+//      of objects count to be displayed for this tab.
+//
+// 2. Adding infinite scrolling to the "Recent" tab in "index" page
+//  The steps below describe adding scrolling to the pipeline_instances index page.
+//
+//  a. In the "app/views/pipeline_instances/_show_recent.html.erb/" file
+//      add a div or tbody with data-infinite-scroller.
+//
+//  b. Add the partial "_show_recent_rows.html.erb" that displays the
+//      page contents on scroll using the @objects
+
  function maybe_load_more_content(event) {
      var scroller = this;
      var $container = $(event.data.container);
@@ -38,7 +72,7 @@ function maybe_load_more_content(event) {
          }
          $container.find(".spinner").detach();
          $container.append(spinner);
-        $container.attr('data-infinite-serial', serial);
+        $container.data('data-infinite-serial', serial);
  
          if (src == $container.attr('data-infinite-content-href0')) {
              // If we're loading the first page, collect filters from
@@ -69,12 +103,12 @@ function maybe_load_more_content(event) {
              fail(function(jqxhr, status, error) {
                  var $faildiv;
                  var $container = this.container;
-                if ($container.attr('data-infinite-serial') != this.serial) {
+                if ($container.data('data-infinite-serial') != this.serial) {
                      // A newer request is already in progress.
                      return;
                  }
                  if (jqxhr.readyState == 0 || jqxhr.status == 0) {
-                    message = "Cancelled."
+                    message = "Cancelled.";
                  } else if (jqxhr.responseJSON && jqxhr.responseJSON.errors) {
                      message = jqxhr.responseJSON.errors.join("; ");
                  } else {
@@ -89,7 +123,7 @@ function maybe_load_more_content(event) {
                  $container.find('div.spinner').replaceWith($faildiv);
              }).
              done(function(data, status, jqxhr) {
-                if ($container.attr('data-infinite-serial') != this.serial) {
+                if ($container.data('data-infinite-serial') != this.serial) {
                      // A newer request is already in progress.
                      return;
                  }
diff --git a/apps/workbench/app/assets/javascripts/modal_pager.js b/apps/workbench/app/assets/javascripts/modal_pager.js

new file mode 100644 (file)

index 0000000..58cf7e4
--- /dev/null
+++ b/apps/workbench/app/assets/javascripts/modal_pager.js
@@ -0,0 +1,44 @@
+// Usage:
+//
+// 1. Add some buttons to your modal, one with class="pager-next" and
+// one with class="pager-prev".
+//
+// 2. Put multiple .modal-body sections in your modal.
+//
+// 3. Add a "pager-count" div where page count is shown.
+// For ex: "1 of 10" when showing first page of 10 pages.
+
+$(document).on('click', '.modal .pager-next', function() {
+    var $modal = $(this).parents('.modal');
+    $modal.data('page', ($modal.data('page') || 0) + 1).trigger('pager:render');
+    return false;
+}).on('click', '.modal .pager-prev', function() {
+    var $modal = $(this).parents('.modal');
+    $modal.data('page', ($modal.data('page') || 1) - 1).trigger('pager:render');
+    return false;
+}).on('ready ajax:success', function() {
+    $('.modal').trigger('pager:render');
+}).on('pager:render', '.modal', function() {
+    var $modal = $(this);
+    var page = $modal.data('page') || 0;
+    var $panes = $('.modal-body', $modal);
+    if (page >= $panes.length) {
+        // Somehow moved past end
+        page = $panes.length - 1;
+        $modal.data('page', page);
+    } else if (page < 0) {
+        page = 0;
+    }
+
+    var $pager_count = $('.pager-count', $modal);
+    $pager_count.text((page+1) + " of " + $panes.length);
+
+    var selected = $panes.hide().eq(page).show();
+    enableButton($('.pager-prev', $modal), page > 0);
+    enableButton($('.pager-next', $modal), page < $panes.length - 1);
+    function enableButton(btn, ok) {
+        btn.prop('disabled', !ok).
+            toggleClass('btn-primary', ok).
+            toggleClass('btn-default', !ok);
+    }
+});
diff --git a/apps/workbench/app/assets/javascripts/pipeline_instances.js b/apps/workbench/app/assets/javascripts/pipeline_instances.js

index e820ba978ec3dddf07dcec83f08fb9573d6a7727..8bb25c13c080138641e6e8ed1a22124a585c44ae 100644 (file)
--- a/apps/workbench/app/assets/javascripts/pipeline_instances.js
+++ b/apps/workbench/app/assets/javascripts/pipeline_instances.js
@@ -1,6 +1,6 @@
  function run_pipeline_button_state() {
      var a = $('a.editable.required.editable-empty,input.form-control.required[value=""]');
-    if (a.length > 0) {
+    if ((a.length > 0) || ($('.unreadable-inputs-present').length)) {
          $(".run-pipeline-button").addClass("disabled");
      }
      else {
@@ -44,6 +44,17 @@ $(document).on('ready ajax:complete', function() {
              $tag.parent().prev().css("background-color", "");
          }
      });
+    $('input.required').each(function() {
+        var $tag = $(this);
+        if ($tag.hasClass("unreadable-input")) {
+            $tag.parent().parent().css("background-color", "#ffdddd");
+            $tag.parent().parent().prev().css("background-color", "#ffdddd");
+        }
+        else {
+            $tag.parent().parent().css("background-color", "");
+            $tag.parent().parent().prev().css("background-color", "");
+        }
+    });
      run_pipeline_button_state();
  });
  
diff --git a/apps/workbench/app/assets/javascripts/select_modal.js b/apps/workbench/app/assets/javascripts/select_modal.js

index d9ad7f8c397e8d5726ba41ac4932935e750f046e..17b334eb643438631eb35c3b8ffa31d04d9c2d30 100644 (file)
--- a/apps/workbench/app/assets/javascripts/select_modal.js
+++ b/apps/workbench/app/assets/javascripts/select_modal.js
@@ -99,8 +99,9 @@ $(document).on('click', '.selectable', function() {
                  message = "Request failed.";
              }
              this.modal.find('.modal-error').
-                html('<div class="alert alert-danger">' + message + '</div>').
-                show();
+                html('<div class="alert alert-danger"></div>').
+                show().
+                children().text(message);
          }).
          done(function(data, status, jqxhr) {
              var event_name = this.action_data.success;
diff --git a/apps/workbench/app/assets/javascripts/selection.js.erb b/apps/workbench/app/assets/javascripts/selection.js.erb

index 55df78697c59112b50e4b0d61f753dc56f775453..5c69c50c119b5dd62c930b3b2144c109812d6871 100644 (file)
--- a/apps/workbench/app/assets/javascripts/selection.js.erb
+++ b/apps/workbench/app/assets/javascripts/selection.js.erb
@@ -86,3 +86,11 @@ $(document).
              on('click', dispatch_selection_action);
          $(this).trigger('selections-updated');
      });
+
+function select_all_items() {
+  $(".arv-selectable-items :checkbox").filter(":visible").prop("checked", true).trigger("change");
+}
+
+function unselect_all_items() {
+  $(".arv-selectable-items :checkbox").filter(":visible").prop("checked", false).trigger("change");
+}
diff --git a/apps/workbench/app/assets/javascripts/tab_panes.js b/apps/workbench/app/assets/javascripts/tab_panes.js

index 0b38dbc16f78e71c1b4b4e7c3c5e7c01bc3c5f96..ddc5576d432da19010f320b623a3721437bc9d31 100644 (file)
--- a/apps/workbench/app/assets/javascripts/tab_panes.js
+++ b/apps/workbench/app/assets/javascripts/tab_panes.js
@@ -133,7 +133,23 @@ $(document).on('arv:pane:reload', '[data-pane-content-url]', function(e) {
              var $pane = this;
              var errhtml;
              var contentType = jqxhr.getResponseHeader('Content-Type');
-            if (contentType && contentType.match(/\btext\/html\b/)) {
+            if (jqxhr.readyState == 0 || jqxhr.status == 0) {
+                if ($pane.attr('data-loaded-at') > 0) {
+                    // Stale content is already present. Leave it
+                    // there while loading the next page.
+                    $pane.removeClass('pane-loading');
+                    $pane.addClass('pane-loaded');
+                    // ...but schedule another refresh (after a
+                    // throttle delay) in case the act of navigating
+                    // away gets cancelled itself, leaving this page
+                    // with content that we know is stale.
+                    $pane.addClass('pane-stale');
+                    $pane.attr('data-loaded-at', (new Date()).getTime());
+                    $pane.trigger('arv:pane:reload');
+                    return;
+                }
+                errhtml = "Cancelled.";
+            } else if (contentType && contentType.match(/\btext\/html\b/)) {
                  var $response = $(jqxhr.responseText);
                  var $wrapper = $('div#page-wrapper', $response);
                  if ($wrapper.length) {
diff --git a/apps/workbench/app/assets/javascripts/users.js b/apps/workbench/app/assets/javascripts/users.js

index ee1c7dda37af86e7eb1719791a29052920b6673d..481f46f72112499e0957fdc79db27563db22dc99 100644 (file)
--- a/apps/workbench/app/assets/javascripts/users.js
+++ b/apps/workbench/app/assets/javascripts/users.js
@@ -29,7 +29,7 @@ $(document).
             if (response.errors) {
                 error_div.html($('<p/>').text(response.errors).html());
             } else {
-               error_div.html('<p>Sorry, request failed.');
+               error_div.html('<p>Sorry, request failed.</p>');
             }
             error_div.show();
             $($('input[name=disable_element]', e.target).val()).
diff --git a/apps/workbench/app/assets/stylesheets/application.css.scss b/apps/workbench/app/assets/stylesheets/application.css.scss

index ec7eee44b72d3954600a79365d9f4eaa2896e56a..a09de69c1f0916854bf2179191d8077731c8bc9d 100644 (file)
--- a/apps/workbench/app/assets/stylesheets/application.css.scss
+++ b/apps/workbench/app/assets/stylesheets/application.css.scss
@@ -219,6 +219,10 @@ table.table-fixed-header-row tbody {
      overflow-y: auto;
  }
  
+.dropdown-menu a {
+    cursor: pointer;
+}
+
  .row-fill-height, .row-fill-height>div[class*='col-'] {
      display: flex;
  }
@@ -296,3 +300,15 @@ span.editable-textile {
  ul.nav.nav-tabs {
      font-size: 90%
  }
+
+.hover-dropdown:hover .dropdown-menu {
+  display: block;
+}
+
+.arv-description-as-subtitle .editable-inline,
+.arv-description-as-subtitle .editable-inline .form-group,
+.arv-description-as-subtitle .editable-inline .form-group .editable-input,
+.arv-description-as-subtitle .editable-inline .form-group .editable-input textarea,
+{
+    width: 98%!important;
+}
diff --git a/apps/workbench/app/controllers/actions_controller.rb b/apps/workbench/app/controllers/actions_controller.rb

index 7737a3cfe4abdc8cded0159ac3bb1d5e13527768..58b8cdc54f018e6dae20ba7b9c182bfbaef909c0 100644 (file)
--- a/apps/workbench/app/controllers/actions_controller.rb
+++ b/apps/workbench/app/controllers/actions_controller.rb
@@ -1,5 +1,15 @@
+require "arvados/collection"
+
  class ActionsController < ApplicationController
  
+  # Skip require_thread_api_token if this is a show action
+  # for an object uuid that supports anonymous access.
+  skip_around_filter :require_thread_api_token, if: proc { |ctrl|
+    Rails.configuration.anonymous_user_token and
+    'show' == ctrl.action_name and
+    params['uuid'] and
+    model_class.in?([Collection, Group, Job, PipelineInstance, PipelineTemplate])
+  }
    skip_filter :require_thread_api_token, only: [:report_issue_popup, :report_issue]
    skip_filter :check_user_agreements, only: [:report_issue_popup, :report_issue]
  
@@ -19,6 +29,8 @@ class ActionsController < ApplicationController
          @object.link_class == 'name' and
          ArvadosBase::resource_class_for_uuid(@object.head_uuid) == Collection
        redirect_to collection_path(id: @object.uuid)
+    elsif @object.is_a?(Group) and @object.group_class == 'project'
+      redirect_to project_path(id: @object.uuid)
      elsif @object
        redirect_to @object
      else
@@ -89,7 +101,8 @@ class ActionsController < ApplicationController
      end
      if (resource_classes == [Collection] and
          @object.is_a? Group and
-        @object.group_class == 'project')
+        @object.group_class == 'project') or
+        @object.is_a? User
        # In the common case where only collections are copied/moved
        # into a project, it's polite to land on the collections tab on
        # the destination project.
@@ -100,141 +113,113 @@ class ActionsController < ApplicationController
      end
    end
  
-  def arv_normalize mt, *opts
-    r = ""
-    env = Hash[ENV].
-      merge({'ARVADOS_API_HOST' =>
-              arvados_api_client.arvados_v1_base.
-              sub(/\/arvados\/v1/, '').
-              sub(/^https?:\/\//, ''),
-              'ARVADOS_API_TOKEN' => 'x',
-              'ARVADOS_API_HOST_INSECURE' =>
-              Rails.configuration.arvados_insecure_https ? 'true' : 'false'
-            })
-    IO.popen([env, 'arv-normalize'] + opts, 'w+b') do |io|
-      io.write mt
-      io.close_write
-      while buf = io.read(2**16)
-        r += buf
-      end
+  expose_action :combine_selected_files_into_collection do
+    link_uuids, coll_ids = params["selection"].partition do |sel_s|
+      ArvadosBase::resource_class_for_uuid(sel_s) == Link
      end
-    r
-  end
  
-  expose_action :combine_selected_files_into_collection do
-    uuids = []
-    pdhs = []
-    files = []
-    params["selection"].each do |s|
-      a = ArvadosBase::resource_class_for_uuid s
-      if a == Link
-        begin
-          if (m = CollectionsHelper.match(Link.find(s).head_uuid))
-            pdhs.append(m[1] + m[2])
-            files.append(m)
-          end
-        rescue
+    unless link_uuids.empty?
+      Link.select([:head_uuid]).where(uuid: link_uuids).each do |link|
+        if ArvadosBase::resource_class_for_uuid(link.head_uuid) == Collection
+          coll_ids << link.head_uuid
          end
-      elsif (m = CollectionsHelper.match(s))
-        pdhs.append(m[1] + m[2])
-        files.append(m)
-      elsif (m = CollectionsHelper.match_uuid_with_optional_filepath(s))
-        uuids.append(m[1])
-        files.append(m)
        end
      end
  
-    pdhs = pdhs.uniq
-    uuids = uuids.uniq
-    chash = {}
-
-    Collection.select([:uuid, :manifest_text]).where(uuid: uuids).each do |c|
-      chash[c.uuid] = c
+    uuids = []
+    pdhs = []
+    source_paths = Hash.new { |hash, key| hash[key] = [] }
+    coll_ids.each do |coll_id|
+      if m = CollectionsHelper.match(coll_id)
+        key = m[1] + m[2]
+        pdhs << key
+        source_paths[key] << m[4]
+      elsif m = CollectionsHelper.match_uuid_with_optional_filepath(coll_id)
+        key = m[1]
+        uuids << key
+        source_paths[key] << m[4]
+      end
      end
  
-    Collection.select([:portable_data_hash, :manifest_text]).where(portable_data_hash: pdhs).each do |c|
-      chash[c.portable_data_hash] = c
+    unless pdhs.empty?
+      Collection.where(portable_data_hash: pdhs.uniq).
+          select([:uuid, :portable_data_hash]).each do |coll|
+        unless source_paths[coll.portable_data_hash].empty?
+          uuids << coll.uuid
+          source_paths[coll.uuid] = source_paths.delete(coll.portable_data_hash)
+        end
+      end
      end
  
-    combined = ""
-    files_in_dirs = {}
-    files.each do |m|
-      mt = chash[m[1]+m[2]].andand.manifest_text
-      if not m[4].nil? and m[4].size > 1
-        manifest_files = files_in_dirs['.']
-        if !manifest_files
-          manifest_files = []
-          files_in_dirs['.'] = manifest_files
-        end
-        manifest_file = m[4].split('/')[-1]
-        uniq_file = derive_unique_filename(manifest_file, manifest_files)
-        normalized = arv_normalize mt, '--extract', ".#{m[4]}"
-        normalized = normalized.gsub(/(\d+:\d+:)(#{Regexp.quote manifest_file})/) {|s| "#{$1}#{uniq_file}" }
-        combined += normalized
-        manifest_files << uniq_file
+    new_coll = Arv::Collection.new
+    Collection.where(uuid: uuids.uniq).
+        select([:uuid, :manifest_text]).each do |coll|
+      src_coll = Arv::Collection.new(coll.manifest_text)
+      src_pathlist = source_paths[coll.uuid]
+      if src_pathlist.any?(&:blank?)
+        src_pathlist = src_coll.each_file_path
+        destdir = nil
        else
-        mt = arv_normalize mt
-        manifest_streams = mt.split "\n"
-        adjusted_streams = []
-        manifest_streams.each do |stream|
-          manifest_parts = stream.split
-          adjusted_parts = []
-          manifest_files = files_in_dirs[manifest_parts[0]]
-          if !manifest_files
-            manifest_files = []
-            files_in_dirs[manifest_parts[0]] = manifest_files
-          end
-
-          manifest_parts.each do |part|
-            part_match = /(\d+:\d+:)(\S+)/.match(part)
-            if part_match
-              uniq_file = derive_unique_filename(part_match[2], manifest_files)
-              adjusted_parts << "#{part_match[1]}#{uniq_file}" 
-              manifest_files << uniq_file
-            else
-              adjusted_parts << part
-            end
-          end
-          adjusted_streams << adjusted_parts.join(' ')
+        destdir = "."
+      end
+      src_pathlist.each do |src_path|
+        src_path = src_path.sub(/^(\.\/|\/|)/, "./")
+        src_stream, _, basename = src_path.rpartition("/")
+        dst_stream = destdir || src_stream
+        # Generate a unique name by adding (1), (2), etc. to it.
+        # If the filename has a dot that's not at the beginning, insert the
+        # number just before that.  Otherwise, append the number to the name.
+        if match = basename.match(/[^\.]\./)
+          suffix_start = match.begin(0) + 1
+        else
+          suffix_start = basename.size
          end
-        adjusted_streams.each do |stream|
-          combined += (stream + "\n")
+        suffix_size = 0
+        dst_path = nil
+        loop.each_with_index do |_, try_count|
+          dst_path = "#{dst_stream}/#{basename}"
+          break unless new_coll.exist?(dst_path)
+          uniq_suffix = "(#{try_count + 1})"
+          basename[suffix_start, suffix_size] = uniq_suffix
+          suffix_size = uniq_suffix.size
          end
+        new_coll.cp_r(src_path, dst_path, src_coll)
        end
      end
  
-    normalized = arv_normalize combined
-    newc = Collection.new({:manifest_text => normalized})
-    newc.name = newc.name || "Collection created at #{Time.now.localtime}"
+    coll_attrs = {
+      manifest_text: new_coll.manifest_text,
+      name: "Collection created at #{Time.now.localtime}",
+    }
+    flash = {}
  
      # set owner_uuid to current project, provided it is writable
-    current_project_writable = false
-    action_data = JSON.parse(params['action_data']) if params['action_data']
-    if action_data && action_data['current_project_uuid']
-      current_project = Group.find(action_data['current_project_uuid']) rescue nil
-      if (current_project && current_project.writable_by.andand.include?(current_user.uuid))
-        newc.owner_uuid = action_data['current_project_uuid']
-        current_project_writable = true
-      end
+    action_data = Oj.load(params['action_data'] || "{}")
+    if action_data['current_project_uuid'] and
+        current_project = Group.find?(action_data['current_project_uuid']) and
+        current_project.writable_by.andand.include?(current_user.uuid)
+      coll_attrs[:owner_uuid] = current_project.uuid
+      flash[:message] =
+        "Created new collection in the project #{current_project.name}."
+    else
+      flash[:message] = "Created new collection in your Home project."
      end
  
-    newc.save!
-
-    chash.each do |k,v|
-      l = Link.new({
-                     tail_uuid: k,
-                     head_uuid: newc.uuid,
-                     link_class: "provenance",
-                     name: "provided"
-                   })
-      l.save!
+    newc = Collection.create!(coll_attrs)
+    source_paths.each_key do |src_uuid|
+      unless Link.create({
+                           tail_uuid: src_uuid,
+                           head_uuid: newc.uuid,
+                           link_class: "provenance",
+                           name: "provided",
+                         })
+        flash[:error] = "
+An error occurred when saving provenance information for this collection.
+You can try recreating the collection to get a copy with full provenance data."
+        break
+      end
      end
-
-    msg = current_project_writable ?
-              "Created new collection in the project #{current_project.name}." :
-              "Created new collection in your Home project."
-
-    redirect_to newc, flash: {'message' => msg}
+    redirect_to(newc, flash: flash)
    end
  
    def report_issue_popup
diff --git a/apps/workbench/app/controllers/application_controller.rb b/apps/workbench/app/controllers/application_controller.rb

index 1b59c574b774a897372a7f771dc4cdeda5f45df2..db3d43040c416bef846a19d8ee0b4009a9e8f622 100644 (file)
--- a/apps/workbench/app/controllers/application_controller.rb
+++ b/apps/workbench/app/controllers/application_controller.rb
@@ -707,6 +707,7 @@ class ApplicationController < ActionController::Base
    @@notification_tests = []
  
    @@notification_tests.push lambda { |controller, current_user|
+    return nil if Rails.configuration.shell_in_a_box_url
      AuthorizedKey.limit(1).where(authorized_user_uuid: current_user.uuid).each do
        return nil
      end
@@ -1068,6 +1069,39 @@ class ApplicationController < ActionController::Base
      @all_log_collections_for
    end
  
+  # Helper method to get one collection for the given portable_data_hash
+  # This is used to determine if a pdh is readable by the current_user
+  helper_method :collection_for_pdh
+  def collection_for_pdh pdh
+    raise ArgumentError, 'No input argument' unless pdh
+    preload_for_pdhs([pdh])
+    @all_pdhs_for[pdh] ||= []
+  end
+
+  # Helper method to preload one collection each for the given pdhs
+  # This is used to determine if a pdh is readable by the current_user
+  helper_method :preload_for_pdhs
+  def preload_for_pdhs pdhs
+    @all_pdhs_for ||= {}
+
+    raise ArgumentError, 'Argument is not an array' unless pdhs.is_a? Array
+    return @all_pdhs_for if pdhs.empty?
+
+    # if already preloaded for all of these pdhs, return
+    if not pdhs.select { |x| @all_pdhs_for[x].nil? }.any?
+      return @all_pdhs_for
+    end
+
+    pdhs.each do |x|
+      @all_pdhs_for[x] = []
+    end
+
+    Collection.select(%w(portable_data_hash)).where(portable_data_hash: pdhs).distinct().each do |collection|
+      @all_pdhs_for[collection.portable_data_hash] << collection
+    end
+    @all_pdhs_for
+  end
+
    # helper method to get object of a given dataclass and uuid
    helper_method :object_for_dataclass
    def object_for_dataclass dataclass, uuid
@@ -1087,10 +1121,14 @@ class ApplicationController < ActionController::Base
      return @objects_for if uuids.empty?
  
      # if already preloaded for all of these uuids, return
-    if not uuids.select { |x| @objects_for[x].nil? }.any?
+    if not uuids.select { |x| !@objects_for.include?(x) }.any?
        return @objects_for
      end
  
+    # preset all uuids to nil
+    uuids.each do |x|
+      @objects_for[x] = nil
+    end
      dataclass.where(uuid: uuids).each do |obj|
        @objects_for[obj.uuid] = obj
      end
diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb

index e883017070d20ccdc7613e8f6c88ea9acaf1930e..e01151ca408b567ec9908349bdfd7a51bcd15a2f 100644 (file)
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -119,10 +119,14 @@ class CollectionsController < ApplicationController
      # we ask the API server if the file actually exists.  This serves two
      # purposes: it lets us return a useful status code for common errors, and
      # helps us figure out which token to provide to arv-get.
+    # The order of searched tokens is important: because the anonymous user
+    # token is passed along with every API request, we have to check it first.
+    # Otherwise, it's impossible to know whether any other request succeeded
+    # because of the reader token.
      coll = nil
-    tokens = [Thread.current[:arvados_api_token],
+    tokens = [(Rails.configuration.anonymous_user_token || nil),
                params[:reader_token],
-              (Rails.configuration.anonymous_user_token || nil)].compact
+              Thread.current[:arvados_api_token]].compact
      usable_token = find_usable_token(tokens) do
        coll = Collection.find(params[:uuid])
      end
@@ -198,7 +202,7 @@ class CollectionsController < ApplicationController
  
      if current_user
        if Keep::Locator.parse params["uuid"]
-        @same_pdh = Collection.filter([["portable_data_hash", "=", @object.portable_data_hash]])
+        @same_pdh = Collection.filter([["portable_data_hash", "=", @object.portable_data_hash]]).limit(20)
          if @same_pdh.results.size == 1
            redirect_to collection_path(@same_pdh[0]["uuid"])
            return
@@ -206,6 +210,8 @@ class CollectionsController < ApplicationController
          owners = @same_pdh.map(&:owner_uuid).to_a.uniq
          preload_objects_for_dataclass Group, owners
          preload_objects_for_dataclass User, owners
+        uuids = @same_pdh.map(&:uuid).to_a.uniq
+        preload_links_for_objects uuids
          render 'hash_matches'
          return
        else
@@ -224,6 +230,7 @@ class CollectionsController < ApplicationController
            .where(head_uuid: @object.uuid, link_class: 'permission',
                   name: 'can_read').results
          @logs = Log.limit(RELATION_LIMIT).order("created_at DESC")
+          .select(%w(uuid event_type object_uuid event_at summary))
            .where(object_uuid: @object.uuid).results
          @is_persistent = Link.limit(1)
            .where(head_uuid: @object.uuid, tail_uuid: current_user.uuid,
@@ -266,15 +273,6 @@ class CollectionsController < ApplicationController
      sharing_popup
    end
  
-  def update
-    @updates ||= params[@object.resource_param_name.to_sym]
-    if @updates && (@updates.keys - ["name", "description"]).empty?
-      # exclude manifest_text since only name or description is being updated
-      @object.manifest_text = nil
-    end
-    super
-  end
-
    protected
  
    def find_usable_token(token_list)
diff --git a/apps/workbench/app/controllers/jobs_controller.rb b/apps/workbench/app/controllers/jobs_controller.rb

index 7edf8cc30d542a69489589b52b3c05d4abf0e270..398417734c71c34f2aaac71fbf700eaf4d5f50d1 100644 (file)
--- a/apps/workbench/app/controllers/jobs_controller.rb
+++ b/apps/workbench/app/controllers/jobs_controller.rb
@@ -4,8 +4,6 @@ class JobsController < ApplicationController
      'show' == ctrl.action_name
    }
  
-  include JobsHelper
-
    def generate_provenance(jobs)
      return if params['tab_pane'] != "Provenance"
  
diff --git a/apps/workbench/app/controllers/pipeline_instances_controller.rb b/apps/workbench/app/controllers/pipeline_instances_controller.rb

index b4cce9be03e42bd2899590101a671717deb6295b..c5fbda0cf349177801a0bcbbd75c7c95634b56ef 100644 (file)
--- a/apps/workbench/app/controllers/pipeline_instances_controller.rb
+++ b/apps/workbench/app/controllers/pipeline_instances_controller.rb
@@ -284,6 +284,62 @@ class PipelineInstancesController < ApplicationController
      %w(Compare Graph)
    end
  
+  helper_method :unreadable_inputs_present?
+  def unreadable_inputs_present?
+    unless @unreadable_inputs_present.nil?
+      return @unreadable_inputs_present
+    end
+
+    input_uuids = []
+    input_pdhs = []
+    @object.components.each do |k, component|
+      next if !component
+      component[:script_parameters].andand.each do |p, tv|
+        if (tv.is_a? Hash) and ((tv[:dataclass] == "Collection") || (tv[:dataclass] == "File"))
+          if tv[:value]
+            value = tv[:value]
+          elsif tv[:default]
+            value = tv[:default]
+          else
+            value = ''
+          end
+          if value.present?
+            split = value.split '/'
+            if CollectionsHelper.match(split[0])
+              input_pdhs << split[0]
+            else
+              input_uuids << split[0]
+            end
+          end
+        end
+      end
+    end
+
+    input_pdhs = input_pdhs.uniq
+    input_uuids = input_uuids.uniq
+
+    preload_collections_for_objects input_uuids if input_uuids.any?
+    preload_for_pdhs input_pdhs if input_pdhs.any?
+
+    @unreadable_inputs_present = false
+    input_uuids.each do |uuid|
+      if !collections_for_object(uuid).any?
+        @unreadable_inputs_present = true
+        break
+      end
+    end
+    if !@unreadable_inputs_present
+      input_pdhs.each do |pdh|
+        if !collection_for_pdh(pdh).any?
+          @unreadable_inputs_present = true
+          break
+        end
+      end
+    end
+
+    @unreadable_inputs_present
+  end
+
    protected
    def for_comparison v
      if v.is_a? Hash or v.is_a? Array
diff --git a/apps/workbench/app/controllers/projects_controller.rb b/apps/workbench/app/controllers/projects_controller.rb

index 3302771814eb3bc217f72c2e0aa5768e932dc65e..e49ed1fab65f38b6631c0298f8ba508feacd9087 100644 (file)
--- a/apps/workbench/app/controllers/projects_controller.rb
+++ b/apps/workbench/app/controllers/projects_controller.rb
@@ -1,8 +1,8 @@
  class ProjectsController < ApplicationController
-  before_filter :set_share_links, if: -> { defined? @object }
+  before_filter :set_share_links, if: -> { defined? @object and @object}
    skip_around_filter :require_thread_api_token, if: proc { |ctrl|
      Rails.configuration.anonymous_user_token and
-    %w(show tab_counts).include? ctrl.action_name
+    %w(show tab_counts public).include? ctrl.action_name
    }
  
    def model_class
@@ -10,12 +10,22 @@ class ProjectsController < ApplicationController
    end
  
    def find_object_by_uuid
-    if current_user and params[:uuid] == current_user.uuid
-      @object = current_user.dup
-      @object.uuid = current_user.uuid
+    if (current_user and params[:uuid] == current_user.uuid) or
+       (resource_class_for_uuid(params[:uuid]) == User)
+      if params[:uuid] != current_user.uuid
+        @object = User.find(params[:uuid])
+      else
+        @object = current_user.dup
+        @object.uuid = current_user.uuid
+      end
+
        class << @object
          def name
-          'Home'
+          if current_user.uuid == self.uuid
+            'Home'
+          else
+            "Home for #{self.email}"
+          end
          end
          def description
            ''
@@ -65,7 +75,7 @@ class ProjectsController < ApplicationController
        {
          :name => 'Subprojects',
          :filters => [%w(uuid is_a arvados#group)]
-      } if current_user
+      }
      pane_list <<
        {
          :name => 'Other_objects',
@@ -136,7 +146,7 @@ class ProjectsController < ApplicationController
            item.update_attributes owner_uuid: current_user.uuid
            @removed_uuids << item.uuid
          rescue ArvadosApiClient::ApiErrorResponseException => e
-          if e.message.include? 'collection_owner_uuid_name_unique'
+          if e.message.include? '_owner_uuid_name_unique'
              rename_to = item.name + ' removed from ' +
                          (@object.name ? @object.name : @object.uuid) +
                          ' at ' + Time.now.to_s
@@ -174,7 +184,11 @@ class ProjectsController < ApplicationController
    end
  
    def find_objects_for_index
-    @objects = all_projects
+    # We can use the all_projects helper, but we have to dup the
+    # result -- otherwise, when we apply our per-request filters and
+    # limits, they will infect the @all_projects cache too (see
+    # #6640).
+    @objects = all_projects.dup
      super
    end
  
@@ -307,4 +321,11 @@ class ProjectsController < ApplicationController
      end
      objects_and_names
    end
+
+  def public  # Yes 'public' is the name of the action for public projects
+    return render_not_found if not Rails.configuration.anonymous_user_token or not Rails.configuration.enable_public_projects_page
+    @objects = using_specific_api_token Rails.configuration.anonymous_user_token do
+      Group.where(group_class: 'project').order("updated_at DESC")
+    end
+  end
  end
diff --git a/apps/workbench/app/controllers/repositories_controller.rb b/apps/workbench/app/controllers/repositories_controller.rb

index d32c92a1e71fde336c99b52b990b86f019662af8..c5b3501b328e1214cc00c292c19f61d0be07312f 100644 (file)
--- a/apps/workbench/app/controllers/repositories_controller.rb
+++ b/apps/workbench/app/controllers/repositories_controller.rb
@@ -16,4 +16,20 @@ class RepositoriesController < ApplicationController
      panes.delete('Attributes') if !current_user.is_admin
      panes
    end
+
+  def show_tree
+    @commit = params[:commit]
+    @path = params[:path] || ''
+    @subtree = @object.ls_subtree @commit, @path.chomp('/')
+  end
+
+  def show_blob
+    @commit = params[:commit]
+    @path = params[:path]
+    @blobdata = @object.cat_file @commit, @path
+  end
+
+  def show_commit
+    @commit = params[:commit]
+  end
  end
diff --git a/apps/workbench/app/controllers/users_controller.rb b/apps/workbench/app/controllers/users_controller.rb

index 0ca5a85f018af48187865efe030195bbdeeebdbf..d2fcbbb94c7385def6753fb0feee877d9ad7f13d 100644 (file)
--- a/apps/workbench/app/controllers/users_controller.rb
+++ b/apps/workbench/app/controllers/users_controller.rb
@@ -9,7 +9,11 @@ class UsersController < ApplicationController
      if params[:uuid] == current_user.uuid
        respond_to do |f|
          f.html do
-          redirect_to(params[:return_to] || project_path(params[:uuid]))
+          if request.url.include?("/users/#{current_user.uuid}")
+            super
+          else
+            redirect_to(params[:return_to] || project_path(params[:uuid]))
+          end
          end
        end
      else
@@ -52,15 +56,15 @@ class UsersController < ApplicationController
                 1.month.ago.beginning_of_month,
                 Time.now.beginning_of_month]]
      @spans.each do |span, threshold_start, threshold_end|
-      @activity[:logins][span] = Log.
+      @activity[:logins][span] = Log.select(%w(uuid modified_by_user_uuid)).
          filter([[:event_type, '=', 'login'],
                  [:object_kind, '=', 'arvados#user'],
                  [:created_at, '>=', threshold_start],
                  [:created_at, '<', threshold_end]])
-      @activity[:jobs][span] = Job.
+      @activity[:jobs][span] = Job.select(%w(uuid modified_by_user_uuid)).
          filter([[:created_at, '>=', threshold_start],
                  [:created_at, '<', threshold_end]])
-      @activity[:pipeline_instances][span] = PipelineInstance.
+      @activity[:pipeline_instances][span] = PipelineInstance.select(%w(uuid modified_by_user_uuid)).
          filter([[:created_at, '>=', threshold_start],
                  [:created_at, '<', threshold_end]])
        @activity.each do |type, act|
@@ -204,14 +208,32 @@ class UsersController < ApplicationController
          if params['openid_prefix'] && params['openid_prefix'].size>0
            setup_params[:openid_prefix] = params['openid_prefix']
          end
-        if params['repo_name'] && params['repo_name'].size>0
-          setup_params[:repo_name] = params['repo_name']
-        end
          if params['vm_uuid'] && params['vm_uuid'].size>0
            setup_params[:vm_uuid] = params['vm_uuid']
          end
  
-        if User.setup setup_params
+        setup_resp = User.setup setup_params
+        if setup_resp
+          vm_link = nil
+          setup_resp[:items].each do |item|
+            if item[:head_kind] == "arvados#virtualMachine"
+              vm_link = item
+              break
+            end
+          end
+          if params[:groups]
+            new_groups = params[:groups].split(',').map(&:strip).select{|i| !i.empty?}
+            if vm_link and new_groups != vm_link[:properties][:groups]
+              vm_login_link = Link.where(uuid: vm_link[:uuid])
+              if vm_login_link.items_available > 0
+                link = vm_login_link.results.first
+                props = link.properties
+                props[:groups] = new_groups
+                link.save!
+              end
+            end
+          end
+
            format.js
          else
            self.render_error status: 422
@@ -233,15 +255,14 @@ class UsersController < ApplicationController
      end
    end
  
-  def manage_account
-    # repositories current user can read / write
+  def repositories
      repo_links = Link.
        filter([['head_uuid', 'is_a', 'arvados#repository'],
                ['tail_uuid', '=', current_user.uuid],
                ['link_class', '=', 'permission'],
               ])
  
-    owned_repositories = Repository.where(owner_uuid: current_user.uuid)
+    owned_repositories = Repository.where(owner_uuid: @object.uuid)
  
      @my_repositories = (Repository.where(uuid: repo_links.collect(&:head_uuid)) |
                          owned_repositories).
@@ -258,10 +279,11 @@ class UsersController < ApplicationController
      owned_repositories.each do |repo|
        @repo_writable[repo.uuid] = 'can_manage'
      end
+  end
  
-    # virtual machines the current user can login into
+  def virtual_machines
      @my_vm_logins = {}
-    Link.where(tail_uuid: current_user.uuid,
+    Link.where(tail_uuid: @object.uuid,
                 link_class: 'permission',
                 name: 'can_login').
            each do |perm_link|
@@ -271,13 +293,10 @@ class UsersController < ApplicationController
              end
            end
      @my_virtual_machines = VirtualMachine.where(uuid: @my_vm_logins.keys)
+  end
  
-    # current user's ssh keys
-    @my_ssh_keys = AuthorizedKey.where(key_type: 'SSH', owner_uuid: current_user.uuid)
-
-    respond_to do |f|
-      f.html { render template: 'users/manage_account' }
-    end
+  def ssh_keys
+    @my_ssh_keys = AuthorizedKey.where(key_type: 'SSH', owner_uuid: @object.uuid)
    end
  
    def add_ssh_key_popup
@@ -359,8 +378,10 @@ class UsersController < ApplicationController
                                link_class: 'permission',
                                name: 'can_login')
      if vm_login_perms.any?
-      vm_uuid = vm_login_perms.first.head_uuid
+      vm_perm = vm_login_perms.first
+      vm_uuid = vm_perm.head_uuid
        current_selections[:vm_uuid] = vm_uuid
+      current_selections[:groups] = vm_perm.properties[:groups].andand.join(', ')
      end
  
      return current_selections
diff --git a/apps/workbench/app/controllers/virtual_machines_controller.rb b/apps/workbench/app/controllers/virtual_machines_controller.rb

index a62ba81b0941cf46870ddc7f27bf3b97692fd172..73231f8cc09023ef13006fa6ad3e910d8881e874 100644 (file)
--- a/apps/workbench/app/controllers/virtual_machines_controller.rb
+++ b/apps/workbench/app/controllers/virtual_machines_controller.rb
@@ -19,4 +19,14 @@ class VirtualMachinesController < ApplicationController
      end
      super
    end
+
+  def webshell
+    return render_not_found if not Rails.configuration.shell_in_a_box_url
+    @webshell_url = Rails.configuration.shell_in_a_box_url % {
+      uuid: @object.uuid,
+      hostname: @object.hostname,
+    }
+    render layout: false
+  end
+
  end
diff --git a/apps/workbench/app/helpers/application_helper.rb b/apps/workbench/app/helpers/application_helper.rb

index a80290cfa0a4433a7bd5707bbd9e277208c1f103..14b1c34d11e0d45821a1929c00254038c36ba50f 100644 (file)
--- a/apps/workbench/app/helpers/application_helper.rb
+++ b/apps/workbench/app/helpers/application_helper.rb
@@ -165,7 +165,11 @@ module ApplicationHelper
        if opts[:no_link] or (resource_class == User && !current_user)
          raw(link_name)
        else
-        (link_to raw(link_name), { controller: resource_class.to_s.tableize, action: 'show', id: ((opts[:name_link].andand.uuid) || link_uuid) }, style_opts) + raw(tags)
+        controller_class = resource_class.to_s.tableize
+        if controller_class.eql?('groups') and object.andand.group_class.eql?('project')
+          controller_class = 'projects'
+        end
+        (link_to raw(link_name), { controller: controller_class, action: 'show', id: ((opts[:name_link].andand.uuid) || link_uuid) }, style_opts) + raw(tags)
        end
      else
        # just return attrvalue if it is not recognizable as an Arvados object or uuid.
@@ -177,7 +181,51 @@ module ApplicationHelper
      end
    end
  
-  def render_editable_attribute(object, attr, attrvalue=nil, htmloptions={})
+  def link_to_arvados_object_if_readable(attrvalue, link_text_if_not_readable, opts={})
+    resource_class = resource_class_for_uuid(attrvalue.split('/')[0]) if attrvalue.is_a?(String)
+    if !resource_class
+      return link_to_if_arvados_object attrvalue, opts
+    end
+
+    readable = object_readable attrvalue, resource_class
+    if readable
+      link_to_if_arvados_object attrvalue, opts
+    elsif opts[:required] and current_user # no need to show this for anonymous user
+      raw('<div><input type="text" style="border:none;width:100%;background:#ffdddd" disabled=true class="required unreadable-input" value="') + link_text_if_not_readable + raw('" ></input></div>')
+    else
+      link_text_if_not_readable
+    end
+  end
+
+  # This method takes advantage of preloaded collections and objects.
+  # Hence you can improve performance by first preloading objects
+  # related to the page context before using this method.
+  def object_readable attrvalue, resource_class=nil
+    # if it is a collection filename, check readable for the locator
+    attrvalue = attrvalue.split('/')[0] if attrvalue
+
+    resource_class = resource_class_for_uuid(attrvalue) if resource_class.nil?
+    return if resource_class.nil?
+
+    return_value = nil
+    if resource_class.to_s == 'Collection'
+      if CollectionsHelper.match(attrvalue)
+        found = collection_for_pdh(attrvalue)
+        return_value = found.first if found.any?
+      else
+        found = collections_for_object(attrvalue)
+        return_value = found.first if found.any?
+      end
+    else
+      return_value = object_for_dataclass(resource_class, attrvalue)
+    end
+    return_value
+  end
+
+  # Render an editable attribute with the attrvalue of the attr.
+  # The htmloptions are added to the editable element's list of attributes.
+  # The nonhtml_options are only used to customize the display of the element.
+  def render_editable_attribute(object, attr, attrvalue=nil, htmloptions={}, nonhtml_options={})
      attrvalue = object.send(attr) if attrvalue.nil?
      if not object.attribute_editable?(attr)
        if attrvalue && attrvalue.length > 0
@@ -229,11 +277,16 @@ module ApplicationHelper
        "id" => span_id,
        :class => "editable #{is_textile?( object, attr ) ? 'editable-textile' : ''}"
      }.merge(htmloptions).merge(ajax_options)
+
      edit_tiptitle = 'edit'
      edit_tiptitle = 'Warning: do not use hyphens in the repository name as they will be stripped' if (object.class.to_s == 'Repository' and attr == 'name')
-    edit_button = raw('<a href="#" class="btn btn-xs btn-default btn-nodecorate" data-toggle="x-editable tooltip" data-toggle-selector="#' + span_id + '" data-placement="top" title="' + (htmloptions[:tiptitle] || edit_tiptitle) + '"><i class="fa fa-fw fa-pencil"></i></a>')
-    if htmloptions[:btnplacement] == :left
+
+    edit_button = raw('<a href="#" class="btn btn-xs btn-' + (nonhtml_options[:btnclass] || 'default') + ' btn-nodecorate" data-toggle="x-editable tooltip" data-toggle-selector="#' + span_id + '" data-placement="top" title="' + (nonhtml_options[:tiptitle] || edit_tiptitle) + '"><i class="fa fa-fw fa-pencil"></i>' + (nonhtml_options[:btntext] || '') + '</a>')
+
+    if nonhtml_options[:btnplacement] == :left
        edit_button + ' ' + span_tag
+    elsif nonhtml_options[:btnplacement] == :top
+      edit_button + raw('<br/>') + span_tag
      else
        span_tag + ' ' + edit_button
      end
@@ -271,7 +324,7 @@ module ApplicationHelper
      end
  
      if not object.andand.attribute_editable?(attr)
-      return link_to_if_arvados_object attrvalue
+      return link_to_arvados_object_if_readable(attrvalue, attrvalue, {friendly_name: true, required: required})
      end
  
      if dataclass
@@ -323,10 +376,11 @@ module ApplicationHelper
             success: 'page-refresh'
           }.to_json,
          })
+
        return content_tag('div', :class => 'input-group') do
          html = text_field_tag(dn, display_value,
                                :class =>
-                              "form-control #{'required' if required}")
+                              "form-control #{'required' if required} #{'unreadable-input' if attrvalue.present? and !object_readable(attrvalue, Collection)}")
          html + content_tag('span', :class => 'input-group-btn') do
            link_to('Choose',
                    modal_path,
diff --git a/apps/workbench/app/helpers/jobs_helper.rb b/apps/workbench/app/helpers/jobs_helper.rb

deleted file mode 100644 (file)

index 06c3d0d..0000000
--- a/apps/workbench/app/helpers/jobs_helper.rb
+++ /dev/null
@@ -1,22 +0,0 @@
-module JobsHelper
-  def stderr_log_history(job_uuids)
-    results = []
-
-    log_history = Log.where(event_type: 'stderr',
-                            object_uuid: job_uuids).order('id DESC')
-    if !log_history.results.empty?
-      reversed_results = log_history.results.reverse
-      reversed_results.each do |entry|
-        if entry.andand.properties
-          properties = entry.properties
-          text = properties[:text]
-          if text
-            results = results.concat text.split("\n")
-          end
-        end
-      end
-    end
-    return results
-  end
-
-end
diff --git a/apps/workbench/app/helpers/pipeline_instances_helper.rb b/apps/workbench/app/helpers/pipeline_instances_helper.rb

index b0d5216efd1588069050d5b0d5aa371abc07492d..8fafbc2022d5873032d1f9565c2385a26f4a794b 100644 (file)
--- a/apps/workbench/app/helpers/pipeline_instances_helper.rb
+++ b/apps/workbench/app/helpers/pipeline_instances_helper.rb
@@ -289,7 +289,7 @@ module PipelineInstancesHelper
      else
        s = ""
        if days > 0
-        s += "#{days}<span class='time-label-divider'>d</span> "
+        s += "#{days}<span class='time-label-divider'>d</span>"
        end
  
        if (hours > 0)
@@ -298,7 +298,7 @@ module PipelineInstancesHelper
  
        s += "#{minutes}<span class='time-label-divider'>m</span>"
  
-      if not round_to_min
+      if not round_to_min or (days == 0 and hours == 0 and minutes == 0)
          s += "#{seconds}<span class='time-label-divider'>s</span>"
        end
      end
@@ -306,4 +306,11 @@ module PipelineInstancesHelper
      raw(s)
    end
  
+  def render_unreadable_inputs_present
+    if current_user and controller.class.name.eql?('PipelineInstancesController') and unreadable_inputs_present?
+      raw('<div class="alert alert-danger unreadable-inputs-present">' +
+            '<p>One or more inputs provided are not readable by you. ' +
+              'Please correct these before you can run the pipeline.</p></div>')
+    end
+  end
  end
diff --git a/apps/workbench/app/models/arvados_api_client.rb b/apps/workbench/app/models/arvados_api_client.rb

index 992f8fd896989a408b9939c16dc893b85aa58964..4d549d194728eb00a9f3a2a01fd097d84955a16e 100644 (file)
--- a/apps/workbench/app/models/arvados_api_client.rb
+++ b/apps/workbench/app/models/arvados_api_client.rb
@@ -91,6 +91,9 @@ class ArvadosApiClient
            # Use system CA certificates
            @api_client.ssl_config.add_trust_ca('/etc/ssl/certs')
          end
+        if Rails.configuration.api_response_compression
+          @api_client.transparent_gzip_decompression = true
+        end
        end
      end
  
@@ -118,7 +121,7 @@ class ArvadosApiClient
          elsif v == false
            query[k] = 0
          else
-          query[k] = JSON.dump(v)
+          query[k] = Oj.dump(v, mode: :compat)
          end
        end
      else
@@ -131,7 +134,7 @@ class ArvadosApiClient
  
      header = {"Accept" => "application/json"}
  
-    profile_checkpoint { "Prepare request #{url} #{query[:uuid]} #{query[:where]} #{query[:filters]} #{query[:order]}" }
+    profile_checkpoint { "Prepare request #{query["_method"] or "POST"} #{url} #{query[:uuid]} #{query.inspect[0,256]}" }
      msg = @client_mtx.synchronize do
        begin
          @api_client.post(url, query, header: header)
@@ -140,6 +143,12 @@ class ArvadosApiClient
        end
      end
      profile_checkpoint 'API transaction'
+    if @@profiling_enabled
+      if msg.headers['X-Runtime']
+        Rails.logger.info "API server: #{msg.headers['X-Runtime']} runtime reported"
+      end
+      Rails.logger.info "Content-Encoding #{msg.headers['Content-Encoding'].inspect}, Content-Length #{msg.headers['Content-Length'].inspect}, actual content size #{msg.content.size}"
+    end
  
      begin
        resp = Oj.load(msg.content, :symbol_keys => true)
diff --git a/apps/workbench/app/models/arvados_base.rb b/apps/workbench/app/models/arvados_base.rb

index f19d47435ae979564c94cf9117109ff44fe5acdf..b02db7a6b63b5fad8c75ea5f107baa74a58d7151 100644 (file)
--- a/apps/workbench/app/models/arvados_base.rb
+++ b/apps/workbench/app/models/arvados_base.rb
@@ -53,6 +53,7 @@ class ArvadosBase < ActiveRecord::Base
        'modified_by_client_uuid' => '203',
        'uuid' => '999',
      }
+    @loaded_attributes = {}
    end
  
    def self.columns
@@ -74,6 +75,14 @@ class ArvadosBase < ActiveRecord::Base
            @columns << column(k, :text)
            serialize k, coldef[:type].constantize
          end
+        define_method k do
+          unless new_record? or @loaded_attributes.include? k.to_s
+            Rails.logger.debug "BUG: access non-loaded attribute #{k}"
+            # We should...
+            # raise ActiveModel::MissingAttributeError, "missing attribute: #{k}"
+          end
+          super()
+        end
          @attribute_info[k] = coldef
        end
      end
@@ -135,6 +144,10 @@ class ArvadosBase < ActiveRecord::Base
      ArvadosResourceList.new(self).select(*args)
    end
  
+  def self.distinct(*args)
+    ArvadosResourceList.new(self).distinct(*args)
+  end
+
    def self.eager(*args)
      ArvadosResourceList.new(self).eager(*args)
    end
@@ -167,8 +180,15 @@ class ArvadosBase < ActiveRecord::Base
    def save
      obdata = {}
      self.class.columns.each do |col|
-      unless self.send(col.name.to_sym).nil? and !self.changed.include?(col.name)
-          obdata[col.name.to_sym] = self.send(col.name.to_sym)
+      # Non-nil serialized values must be sent because we can't tell
+      # whether they've changed. Other than that, any given attribute
+      # is either unchanged (in which case there's no need to send its
+      # old value in the update/create command) or has been added to
+      # #changed by ActiveRecord's #attr= method.
+      if changed.include? col.name or
+          (self.class.serialized_attributes.include? col.name and
+           @loaded_attributes[col.name])
+        obdata[col.name.to_sym] = self.send col.name
        end
      end
      obdata.delete :id
@@ -194,6 +214,7 @@ class ArvadosBase < ActiveRecord::Base
        end
      end
  
+    changes_applied
      @new_record = false
  
      self
@@ -262,6 +283,7 @@ class ArvadosBase < ActiveRecord::Base
        hash = arvados_api_client.api(self.class, '/' + uuid_or_hash)
      end
      hash.each do |k,v|
+      @loaded_attributes[k.to_s] = true
        if self.respond_to?(k.to_s + '=')
          self.send(k.to_s + '=', v)
        else
@@ -276,6 +298,7 @@ class ArvadosBase < ActiveRecord::Base
        end
      end
      @all_links = nil
+    changes_applied
      @new_record = false
      self
    end
@@ -334,6 +357,10 @@ class ArvadosBase < ActiveRecord::Base
         (ArvadosBase.find(owner_uuid).writable_by.include? current_user.uuid rescue false)))) or false
    end
  
+  def deletable?
+    editable?
+  end
+
    # Array of strings that are the names of attributes that can be edited
    # with X-Editable.
    def editable_attributes
diff --git a/apps/workbench/app/models/arvados_resource_list.rb b/apps/workbench/app/models/arvados_resource_list.rb

index d989715080fd1c244fecf557824a4a0dac883944..9f66d392270b88c50df561e2f6f91c6f2852b013 100644 (file)
--- a/apps/workbench/app/models/arvados_resource_list.rb
+++ b/apps/workbench/app/models/arvados_resource_list.rb
@@ -16,6 +16,11 @@ class ArvadosResourceList
      self
    end
  
+  def distinct(bool=true)
+    @distinct = bool
+    self
+  end
+
    def limit(max_results)
      if not max_results.nil? and not max_results.is_a? Integer
        raise ArgumentError("argument to limit() must be an Integer or nil")
@@ -178,6 +183,7 @@ class ArvadosResourceList
      api_params[:select] = @select if @select
      api_params[:order] = @orderby_spec if @orderby_spec
      api_params[:filters] = @filters if @filters
+    api_params[:distinct] = @distinct if @distinct
  
  
      item_count = 0
diff --git a/apps/workbench/app/models/authorized_key.rb b/apps/workbench/app/models/authorized_key.rb

index d84adea44f1832360b03ac5edc09a5cc10371a08..b0c2fa2f02d892d4e86578ff778045a3d8a7adcd 100644 (file)
--- a/apps/workbench/app/models/authorized_key.rb
+++ b/apps/workbench/app/models/authorized_key.rb
@@ -8,6 +8,6 @@ class AuthorizedKey < ArvadosBase
    end
  
    def self.creatable?
-    current_user
+    false
    end
  end
diff --git a/apps/workbench/app/models/pipeline_instance.rb b/apps/workbench/app/models/pipeline_instance.rb

index f575e20d4ea964355dda807bbafd5d21a33892e9..03d70b2897063435298707e3068dfa79ca16ff34 100644 (file)
--- a/apps/workbench/app/models/pipeline_instance.rb
+++ b/apps/workbench/app/models/pipeline_instance.rb
@@ -1,3 +1,5 @@
+require "arvados/keep"
+
  class PipelineInstance < ArvadosBase
    attr_accessor :pipeline_template
  
@@ -81,4 +83,54 @@ class PipelineInstance < ArvadosBase
    def textile_attributes
      [ 'description' ]
    end
+
+  def job_uuids
+    components_map { |cspec| cspec[:job][:uuid] rescue nil }
+  end
+
+  def job_log_ids
+    components_map { |cspec| cspec[:job][:log] rescue nil }
+  end
+
+  def stderr_log_object_uuids
+    result = job_uuids.values.compact
+    result << uuid
+  end
+
+  def stderr_log_query(limit=nil)
+    query = Log.
+      where(event_type: "stderr",
+            object_uuid: stderr_log_object_uuids).
+      order("id DESC")
+    unless limit.nil?
+      query = query.limit(limit)
+    end
+    query
+  end
+
+  def stderr_log_lines(limit=2000)
+    stderr_log_query(limit).results.reverse.
+      flat_map { |log| log.properties[:text].split("\n") rescue [] }
+  end
+
+  def has_readable_logs?
+    log_pdhs, log_uuids = job_log_ids.values.compact.partition do |loc_s|
+      Keep::Locator.parse(loc_s)
+    end
+    if log_pdhs.any? and
+        Collection.where(portable_data_hash: log_pdhs).limit(1).results.any?
+      true
+    elsif log_uuids.any? and
+        Collection.where(uuid: log_uuids).limit(1).results.any?
+      true
+    else
+      stderr_log_query(1).results.any?
+    end
+  end
+
+  private
+
+  def components_map
+    Hash[components.map { |cname, cspec| [cname, yield(cspec)] }]
+  end
  end
diff --git a/apps/workbench/app/models/repository.rb b/apps/workbench/app/models/repository.rb

index b062dda8610ae7feed3494befbcd42acea37ec4e..7f9513a5a9c9e6e0010085329dfe4c7afaf77d1d 100644 (file)
--- a/apps/workbench/app/models/repository.rb
+++ b/apps/workbench/app/models/repository.rb
@@ -1,6 +1,6 @@
  class Repository < ArvadosBase
    def self.creatable?
-    current_user and current_user.is_admin
+    false
    end
    def attributes_for_display
      super.reject { |x| x[0] == 'fetch_url' }
@@ -12,4 +12,103 @@ class Repository < ArvadosBase
        []
      end
    end
+
+  def show commit_sha1
+    refresh
+    run_git 'show', commit_sha1
+  end
+
+  def cat_file commit_sha1, path
+    refresh
+    run_git 'cat-file', 'blob', commit_sha1 + ':' + path
+  end
+
+  def ls_tree_lr commit_sha1
+    refresh
+    run_git 'ls-tree', '-l', '-r', commit_sha1
+  end
+
+  # subtree returns a list of files under the given path at the
+  # specified commit. Results are returned as an array of file nodes,
+  # where each file node is an array [file mode, blob sha1, file size
+  # in bytes, path relative to the given directory]. If the path is
+  # not found, [] is returned.
+  def ls_subtree commit, path
+    path = path.chomp '/'
+    subtree = []
+    ls_tree_lr(commit).each_line do |line|
+      mode, type, sha1, size, filepath = line.split
+      next if type != 'blob'
+      if filepath[0,path.length] == path and
+          (path == '' or filepath[path.length] == '/')
+        subtree << [mode.to_i(8), sha1, size.to_i,
+                    filepath[path.length,filepath.length]]
+      end
+    end
+    subtree
+  end
+
+  # http_fetch_url returns the first http:// or https:// url (if any)
+  # in the api response's clone_urls attribute.
+  def http_fetch_url
+    clone_urls.andand.select { |u| /^http/ =~ u }.first
+  end
+
+  protected
+
+  # refresh fetches the latest repository content into the local
+  # cache. It is a no-op if it has already been run on this object:
+  # this (pretty much) avoids doing more than one remote git operation
+  # per Workbench request.
+  def refresh
+    run_git 'fetch', http_fetch_url, '+*:*' unless @fresh
+    @fresh = true
+  end
+
+  # run_git sets up the ARVADOS_API_TOKEN environment variable,
+  # creates a local git directory for this repository if necessary,
+  # executes "git --git-dir localgitdir {args to run_git}", and
+  # returns the output. It raises GitCommandError if git exits
+  # non-zero.
+  def run_git *gitcmd
+    if not @workdir
+      workdir = File.expand_path uuid+'.git', Rails.configuration.repository_cache
+      if not File.exists? workdir
+        FileUtils.mkdir_p Rails.configuration.repository_cache
+        [['git', 'init', '--bare', workdir],
+        ].each do |cmd|
+          system *cmd
+          raise GitCommandError.new($?.to_s) unless $?.exitstatus == 0
+        end
+      end
+      @workdir = workdir
+    end
+    [['git', '--git-dir', @workdir, 'config', '--local',
+      "credential.#{http_fetch_url}.username", 'none'],
+     ['git', '--git-dir', @workdir, 'config', '--local',
+      "credential.#{http_fetch_url}.helper",
+      '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'],
+     ['git', '--git-dir', @workdir, 'config', '--local',
+           'http.sslVerify',
+           Rails.configuration.arvados_insecure_https ? 'false' : 'true'],
+     ].each do |cmd|
+      system *cmd
+      raise GitCommandError.new($?.to_s) unless $?.exitstatus == 0
+    end
+    env = {}.
+      merge(ENV).
+      merge('ARVADOS_API_TOKEN' => Thread.current[:arvados_api_token])
+    cmd = ['git', '--git-dir', @workdir] + gitcmd
+    io = IO.popen(env, cmd, err: [:child, :out])
+    output = io.read
+    io.close
+    # "If [io] is opened by IO.popen, close sets $?." --ruby 2.2.1 docs
+    unless $?.exitstatus == 0
+      raise GitCommandError.new("`git #{gitcmd.join ' '}` #{$?}: #{output}")
+    end
+    output
+  end
+
+  class GitCommandError < StandardError
+  end
  end
diff --git a/apps/workbench/app/models/user.rb b/apps/workbench/app/models/user.rb

index 7aaa4fe93951ca831add8b7bae6778e251b8b871..3b5b3083fc531b5b97c937bb326d3c32e377f202 100644 (file)
--- a/apps/workbench/app/models/user.rb
+++ b/apps/workbench/app/models/user.rb
@@ -60,4 +60,7 @@ class User < ArvadosBase
                                                 params))
    end
  
+  def deletable?
+    false
+  end
  end
diff --git a/apps/workbench/app/views/application/_choose.html.erb b/apps/workbench/app/views/application/_choose.html.erb

index 3233d8d0062d71f6aa97538b046bd53ab2db74e8..b1e63da375885f7b2e34fb5e75b1326b05c1e568 100644 (file)
--- a/apps/workbench/app/views/application/_choose.html.erb
+++ b/apps/workbench/app/views/application/_choose.html.erb
@@ -7,6 +7,10 @@
        </div>
  
        <div class="modal-body">
+        <% if params[:message].present? %>
+          <p> <%= params[:message] %> </p>
+        <% end %>
+
          <% project_filters, chooser_filters = (params[:filters] || []).partition do |attr, op, val|
               attr == "owner_uuid" and op == "="
             end %>
diff --git a/apps/workbench/app/views/application/_content.html.erb b/apps/workbench/app/views/application/_content.html.erb

index 782a6af07996efe888489b33bf04c0145d76d9d3..9441a46c26d067f423188db099d240087f29a191 100644 (file)
--- a/apps/workbench/app/views/application/_content.html.erb
+++ b/apps/workbench/app/views/application/_content.html.erb
@@ -5,10 +5,34 @@
    <ul class="nav nav-tabs" data-tab-counts-url="<%= url_for(action: :tab_counts) rescue '' %>">
      <% pane_list.each_with_index do |pane, i| %>
        <% pane_name = (pane.is_a?(Hash) ? pane[:name] : pane) %>
-      <li class="<%= 'active' if i==0 %>">
+
+      <% data_toggle = "tab" %>
+      <% tab_tooltip = "" %>
+      <% link_disabled = "" %>
+
+      <% if (pane_name == "Log") and !(ArvadosBase.find(@object.owner_uuid).writable_by.include?(current_user.andand.uuid) rescue nil)
+          if controller.model_class.to_s == 'Job'
+            if @object.log and !@object.log.empty?
+              logCollection = Collection.find? @object.log
+              if !logCollection
+                data_toggle = "disabled"
+                tab_tooltip = "Log data is not available"
+                link_disabled = "disabled"
+              end
+            end
+          elsif (controller.model_class.to_s == 'PipelineInstance' and
+                 !@object.has_readable_logs?)
+            data_toggle = "disabled"
+            tab_tooltip = "Log data is not available"
+            link_disabled = "disabled"
+          end
+        end
+      %>
+
+      <li class="<%= 'active' if i==0 %> <%= link_disabled %>" data-toggle="tooltip" data-placement="top" title="<%=tab_tooltip%>">
          <a href="#<%= pane_name %>"
             id="<%= pane_name %>-tab"
-           data-toggle="tab"
+           data-toggle="<%= data_toggle %>"
             data-tab-history=true
             data-tab-history-update-url=true
             >
diff --git a/apps/workbench/app/views/application/_delete_object_button.html.erb b/apps/workbench/app/views/application/_delete_object_button.html.erb

index 6d6383e24ec02c6f460f69e738ace017d52082d1..6ece8606a839a1e974386cffdcb081cc217faadf 100644 (file)
--- a/apps/workbench/app/views/application/_delete_object_button.html.erb
+++ b/apps/workbench/app/views/application/_delete_object_button.html.erb
@@ -1,4 +1,4 @@
-<% if object.editable? %>
+<% if object.deletable? %>
    <%= link_to({action: 'destroy', id: object.uuid}, method: :delete, remote: true, data: {confirm: "Really delete #{object.class_for_display.downcase} '#{object.friendly_link_name}'?"}) do %>
      <i class="glyphicon glyphicon-trash"></i>
    <% end %>
diff --git a/apps/workbench/app/views/application/_projects_tree_menu.html.erb b/apps/workbench/app/views/application/_projects_tree_menu.html.erb

index 4de3c2330ed55407c6a01053906a3ddc37dc91b6..77b9d45f93587e1d1102582f62a0bda24e32fb0c 100644 (file)
--- a/apps/workbench/app/views/application/_projects_tree_menu.html.erb
+++ b/apps/workbench/app/views/application/_projects_tree_menu.html.erb
@@ -1,3 +1,6 @@
+<li role="presentation" class="dropdown-header">
+  My projects
+</li>
  <li>
    <%= project_link_to.call({object: current_user, depth: 0}) do %>
      <span style="padding-left: 0">Home</span>
@@ -11,15 +14,3 @@
      <% end %>
    </li>
  <% end %>
-<li class="divider" />
-<li role="presentation" class="dropdown-header">
-  Projects shared with me
-</li>
-<% shared_project_tree.each do |pnode| %>
-  <% next if pnode[:object].class != Group %>
-  <li>
-    <%= project_link_to.call pnode do %>
-      <span style="padding-left: <%= pnode[:depth]-1 %>em"></span><i class="fa fa-fw fa-share-alt" style="color:#aaa"></i> <%= pnode[:object].name %>
-    <% end %>
-  </li>
-<% end %>
diff --git a/apps/workbench/app/views/application/_selection_checkbox.html.erb b/apps/workbench/app/views/application/_selection_checkbox.html.erb

index a234e9f65696d94d0b11ff864b89949232dd9966..47e1dfdecff9454175ac6ead87e2766308e7140f 100644 (file)
--- a/apps/workbench/app/views/application/_selection_checkbox.html.erb
+++ b/apps/workbench/app/views/application/_selection_checkbox.html.erb
@@ -11,6 +11,7 @@
       fn = String.new fn %>
  <%= check_box_tag 'uuids[]', object.uuid, false, {
        :class => 'persistent-selection',
+      :id => object.uuid,
        :friendly_type => object.class.name,
        :friendly_name => fn,
        :href => "#{url_for controller: object.class.name.tableize, action: 'show', id: object.uuid }",
diff --git a/apps/workbench/app/views/application/_show_home_button.html.erb b/apps/workbench/app/views/application/_show_home_button.html.erb

new file mode 100644 (file)

index 0000000..0acd384
--- /dev/null
+++ b/apps/workbench/app/views/application/_show_home_button.html.erb
@@ -0,0 +1,3 @@
+<% if (current_user.is_admin and controller.model_class == User) %>
+  <%= link_to 'Home', "/projects/#{object.uuid}" %>
+<% end %>
diff --git a/apps/workbench/app/views/application/_show_recent.html.erb b/apps/workbench/app/views/application/_show_recent.html.erb

index 3fdbcd701961ad8c708fb21e90fb8d168173d993..8712c0a7c31a4d55c55dd848eccf95b6321f241f 100644 (file)
--- a/apps/workbench/app/views/application/_show_recent.html.erb
+++ b/apps/workbench/app/views/application/_show_recent.html.erb
@@ -25,6 +25,9 @@
          <%= controller.model_class.attribute_info[attr.to_sym].andand[:column_heading] or attr.sub /_uuid/, '' %>
        </th>
        <% end %>
+      <th>
+        <!-- a column for user's home -->
+      </th>
        <th>
          <!-- a column for delete buttons -->
        </th>
@@ -40,9 +43,7 @@
          </td>
        <% end %>
        <td>
-        <% if (current_user.is_admin and current_user.uuid != object.uuid) or !current_user.is_admin %>
-          <%= render :partial => "show_object_button", :locals => {object: object, size: 'xs'} %>
-        <% end %>
+        <%= render :partial => "show_object_button", :locals => {object: object, size: 'xs'} %>
        </td>
  
        <% object.attributes_for_display.each do |attr, attrvalue| %>
@@ -55,6 +56,9 @@
          <% end %>
        </td>
        <% end %>
+      <td>
+        <%= render partial: 'show_home_button', locals: {object:object} %>
+      </td>
        <td>
          <%= render partial: 'delete_object_button', locals: {object:object} %>
        </td>
diff --git a/apps/workbench/app/views/application/_show_sharing.html.erb b/apps/workbench/app/views/application/_show_sharing.html.erb

index 21d6521051c188cb2adc376c7e1a38b7feb11a3e..f22ba87a511ba7e2544e1b6125cd9444167dae4b 100644 (file)
--- a/apps/workbench/app/views/application/_show_sharing.html.erb
+++ b/apps/workbench/app/views/application/_show_sharing.html.erb
@@ -48,6 +48,7 @@
  
    <%= link_to(send("choose_#{share_class}_path",
        title: "Share with #{share_class}",
+      message: "Only #{share_class} you are allowed to access are shown. Please contact your administrator if you need to be added to a specific group.",
        by_project: false,
        preview_pane: false,
        multiple: true,
diff --git a/apps/workbench/app/views/application/_title_and_buttons.html.erb b/apps/workbench/app/views/application/_title_and_buttons.html.erb

index 31ff2e6e21a6e659ad8f75d14d2630e020b4d62b..398f248a39bc478b016b64537a310a6b578bb9b8 100644 (file)
--- a/apps/workbench/app/views/application/_title_and_buttons.html.erb
+++ b/apps/workbench/app/views/application/_title_and_buttons.html.erb
@@ -52,15 +52,17 @@
    <% end %>
  <% end %>
  
-<%
-  # Display any flash messages in an alert. If there is any entry with "error" key, alert-danger is used.
-  flash_msg = ''
-  flash_msg_is_error = false
-  flash.each do |msg|
-    flash_msg_is_error ||= (msg[0]=='error')
-    flash_msg += ('<p class="contain-align-left">' + msg[1] + '</p>')
-  end
-  if flash_msg != ''
-%>
-<div class="flash-message alert <%= flash_msg_is_error ? 'alert-danger' : 'alert-warning' %>"><%=flash_msg.html_safe%></div>
+<% unless flash["error"].blank? %>
+<div class="flash-message alert alert-danger" role="alert">
+  <p class="contain-align-left"><%= flash["error"] %></p>
+</div>
+<% flash.delete("error") %>
+<% end %>
+
+<% unless flash.empty? %>
+<div class="flash-message alert alert-warning">
+  <% flash.each do |_, msg| %>
+  <p class="contain-align-left"><%= msg %></p>
+  <% end %>
+</div>
  <% end %>
diff --git a/apps/workbench/app/views/collections/_show_files.html.erb b/apps/workbench/app/views/collections/_show_files.html.erb

index 8dcef33bf20444b95481cd9a415d03ef76cec5f3..e3c79f143b0de8fdf6d50dcc591ef05af6fef7b8 100644 (file)
--- a/apps/workbench/app/views/collections/_show_files.html.erb
+++ b/apps/workbench/app/views/collections/_show_files.html.erb
@@ -1,13 +1,3 @@
-<script>
-function select_all_files() {
-  $("#collection_files :checkbox").filter(":visible").prop("checked", true).trigger("change");
-}
-
-function unselect_all_files() {
-  $("#collection_files :checkbox").filter(":visible").prop("checked", false).trigger("change");
-}
-</script>
-
  <%
    preview_selectable_container = ''
    preview_selectable = ''
@@ -38,8 +28,8 @@ function unselect_all_files() {
            </ul>
          </div>
          <div class="btn-group btn-group-sm">
-          <button id="select-all" type="button" class="btn btn-default" onClick="select_all_files()">Select all</button>
-          <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_files()">Unselect all</button>
+          <button id="select-all" type="button" class="btn btn-default" onClick="select_all_items()">Select all</button>
+          <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_items()">Unselect all</button>
          </div>
        </div>
        <div class="pull-right">
@@ -53,7 +43,7 @@ function unselect_all_files() {
    <% if file_tree.nil? or file_tree.empty? %>
      <p>This collection is empty.</p>
    <% else %>
-    <ul id="collection_files" class="collection_files <%=preview_selectable_container%>">
+    <ul id="collection_files" class="collection_files arv-selectable-items <%=preview_selectable_container%>">
      <% dirstack = [file_tree.first.first] %>
      <% file_tree.take(10000).each_with_index do |(dirname, filename, size), index| %>
        <% file_path = CollectionsHelper::file_path([dirname, filename]) %>
@@ -90,6 +80,7 @@ function unselect_all_files() {
                    :href => url_for(controller: 'collections', action: 'show_file',
                                     uuid: @object.portable_data_hash, file: file_path),
                    :title => "Include #{file_path} in your selections",
+                  :id => "#{@object.uuid}_file_#{index}",
                  } %>
              <span>&nbsp;</span>
              <% end %>
diff --git a/apps/workbench/app/views/collections/_show_source_summary.html.erb b/apps/workbench/app/views/collections/_show_source_summary.html.erb

index 3d8032b4c214d975395bb894cac3432543985d6f..ddcdc12343f4b9b3c3ad9480249194eaa0793d76 100644 (file)
--- a/apps/workbench/app/views/collections/_show_source_summary.html.erb
+++ b/apps/workbench/app/views/collections/_show_source_summary.html.erb
@@ -7,8 +7,15 @@
  <% end %>
  
  <% if @output_of.andand.any? %>
-  <p><i>This collection was the output of:</i><br />
-    <% pipelines = PipelineInstance.filter([["components", "like", "%#{@object.uuid}%"]]).each do |pipeline| %>
+  <% pipelines = PipelineInstance.limit(5).filter([["components", "like", "%#{@object.uuid}%"]]) %>
+  <%
+    message = "This collection was the output of the following:"
+    if pipelines.items_available > pipelines.results.size
+      message += ' (' + (pipelines.items_available - pipelines.results.size).to_s + ' more results are not shown)'
+    end
+  %>
+  <p><i><%= message %></i><br />
+    <% pipelines.each do |pipeline| %>
        <% pipeline.components.each do |cname, c| %>
          <% if c[:output_uuid] == @object.uuid %>
            <b><%= cname %></b> component of <b><%= link_to_if_arvados_object(pipeline, friendly_name: true) %></b>
@@ -25,7 +32,7 @@
  <% if @log_of.andand.any? %>
    <p><i>This collection contains log messages from:</i><br />
      <%= render_arvados_object_list_start(@log_of, 'Show all jobs',
-                                         jobs_path(filters: [['log', '=', @object.uuid]].to_json)) do |job| %>
+                                         jobs_path(filters: [['log', '=', @object.portable_data_hash]].to_json)) do |job| %>
        <%= link_to_if_arvados_object(job, friendly_name: true) %><br />
      <% end %>
    </p>
diff --git a/apps/workbench/app/views/collections/hash_matches.html.erb b/apps/workbench/app/views/collections/hash_matches.html.erb

index 7c4abb080eb8b9a393308dfc62c3ad18de928eaf..c93b6acbdaed631bfa4a89381cfcc1a02bfa9e1e 100644 (file)
--- a/apps/workbench/app/views/collections/hash_matches.html.erb
+++ b/apps/workbench/app/views/collections/hash_matches.html.erb
@@ -1,3 +1,9 @@
+<%
+  message = "The following collections have this content:"
+  if @same_pdh.items_available > @same_pdh.results.size
+    message += ' (' + (@same_pdh.items_available - @same_pdh.results.size).to_s + ' more results are not shown)'
+  end
+%>
  <div class="row">
    <div class="col-md-10 col-md-offset-1">
      <div class="panel panel-info">
@@ -5,7 +11,7 @@
          <h3 class="panel-title"><%= params["uuid"] %></h3>
        </div>
        <div class="panel-body">
-        <p><i>The following collections have this content:</i></p>
+        <p><i><%= message %></i></p>
          <% @same_pdh.sort { |a,b| b.created_at <=> a.created_at }.each do |c| %>
            <div class="row">
              <div class="col-md-8">
diff --git a/apps/workbench/app/views/getting_started/_getting_started_popup.html.erb b/apps/workbench/app/views/getting_started/_getting_started_popup.html.erb

new file mode 100644 (file)

index 0000000..0db0567
--- /dev/null
+++ b/apps/workbench/app/views/getting_started/_getting_started_popup.html.erb
@@ -0,0 +1,179 @@
+<style>
+div.figure {
+}
+.style_image1 {
+  border: 10px solid #ddd;
+  display: block;
+  margin-left: auto;
+  margin-right: auto;
+}
+.style_image2 {
+  border: 10px solid #ddd;
+  display: block;
+  margin-left: 1em;
+}
+div.figure p {
+  text-align: center;
+  font-style: italic;
+  text-indent: 0;
+  border-top:-0.3em;
+}
+</style>
+
+<div id="getting-started-modal-window" class="modal">
+  <div class="modal-dialog modal-with-loading-spinner" style="width: 50em">
+    <div class="modal-content">
+      <div class="modal-header" style="text-align: center">
+        <button type="button" class="close" data-dismiss="modal" aria-hidden="true">x</button>
+        <div>
+          <div class="col-sm-8"><h4 class="modal-title" style="text-align: right">Getting Started with Arvados</h4></div>  <%#Todo: center instead of right%>
+          <div class="spinner spinner-32px spinner-h-center col-sm-1" hidden="true"></div>
+        </div>
+        <br/>
+      </div>
+
+      <%#First Page%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Welcome!</div></p>
+          <p>
+            What you're looking at right now is <b>Workbench</b>, the graphical interface to the Arvados system.
+          </p><p>
+            <div class="figure">
+              <p> <%= image_tag "pipeline-running.gif", :class => "style_image1" %></p> <%#Todo: shorter gif%>
+              <p>Running the Pathomap pipeline in Arvados.</p>
+            </div>
+          </p><p>
+            Click the <span class="btn btn-sm btn-primary">Next &gt;</span> button below for a speed tour of Arvados.
+          </p><p style="margin-top:2em;">
+            <em><strong>Note:</strong> You can always come back to this Getting Started guide by clicking the <span class="fa fa-lg fa-question-circle"></span> in the upper-right corner.</em>
+          </p>
+        </div>
+      </div>
+
+      <%#Page Two%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Take It for a Spin</div></p>
+          <p>
+            Run your first pipeline in 3 quick steps:
+          </p>
+          <div style="display: block; margin: 0em 2em; padding-top: 1em; padding-bottom: 1em; border: thin dashed silver;">
+            <p style="margin-left: 1em;">
+              <em>First, <a href="/users/welcome">log-in or register</a> with any Google account if you haven't already.</em>
+            </p><p>
+              <ol><li> Go to the <span class="btn btn-sm btn-default"><i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard</span> &gt; <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span>
+                  <p style="margin-top:1em;">
+                    <%= image_tag "mouse-move.gif", :class => "style_image2" %>
+                  </p>
+                </li>
+                <li> <span class="btn btn-sm btn-default"><i class="fa fa-fw fa-gear"></i>Mason Lab -- Ancestry Mapper (public)</span> &gt; <span class="btn btn-sm btn-primary">Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span></li><br>
+                <li> <span class="btn btn-sm btn-primary">Run <i class="fa fa-fw fa-play"></i></span></li>
+              </ol>
+          </p></div>
+          <p style="margin-top:1em;">
+            <i class="fa fa-flag fa-flip-horizontal" style="color: green"></i> <i class="fa fa-child"></i>
+            <strong>Voila!</strong> <i class="fa fa-child"></i> <i class="fa fa-flag" style="color: green"></i>
+            Your pipeline is now spooling up and getting ready to run!
+          </p><p>
+            Go ahead, try it for yourself right now. <span class="glyphicon glyphicon-thumbs-up"></span>
+          </p><p>
+            Or click <span class="btn btn-sm btn-primary">Next &gt;</span> below to keep reading!
+          </p>
+        </div>
+      </div>
+
+      <%#Page Three%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Three Useful Terms</div></p>
+          <ol>
+            <li>
+              <strong>Pipeline</strong> — A re-usable series of analysis steps.
+              <ul>
+                <li>
+                  Also known as a “workflow” in other systems
+                </li><li>
+                  A list of well-documented public pipelines can be found in the upper right corner by clicking the <span class="fa fa-lg fa-question-circle"></span> &gt; <a href="<%= Rails.configuration.arvados_public_data_doc_url %>">Public Pipelines and Datasets</a>
+                </li><li>
+                  Pro-tip: A Pipeline contains Jobs which contain Tasks
+                </li><li>
+                  Pipelines can only be shared within a project
+                </li>
+              </ul>
+            </li>
+
+            <li>
+              <strong>Collection </strong>— Like a folder, but better.
+              <ul>
+                <li>
+                  Upload data right in your browser
+                </li><li>
+                  Better than a folder?
+                  <ul><li>
+                      Collections contain the content-address of the data instead of the data itself
+                    </li><li>
+                      Sets of data can be flexibly defined and re-defined without duplicating data
+                    </li>
+                </ul></li><li>
+                  Collections can be shared using the "Sharing and Permissions"  &gt; "Share" button
+                </li>
+              </ul>
+            </li>
+
+            <li>
+              <strong>Projects </strong>— Contain pipelines templates, pipeline instances (individual runs of a pipeline), and collections.
+              <ul><li>
+                  The most useful one is your default "Home" project, under Projects &gt; Home
+                </li><li>
+                  Projects can be shared using the "sharing" tab
+                </li>
+              </ul>
+            </li>
+          </ol>
+
+        </div>
+      </div>
+
+      <%#Page Four%>
+      <div class="modal-body" style="height: 40em; overflow-y: scroll">
+        <div style="margin-top: -0.5em; margin-left: 0.5em;">
+          <p><div style="font-size: 150%;">Six Reasons Arvados is Awesome</div></p>
+          <p>
+            This guide, and in fact all of Workbench, is just a teaser for the full power of Arvados:
+          </p>
+          <ol>
+            <li>
+              <strong>Reproducible analyses</strong>: Enough said.
+            </li><li>
+              <strong>Data provenance</strong>: Every file in Arvados can tell you where it came from.
+            </li><li>
+              <strong>Serious scaling</strong>: Need 500 GB of space? 200 compute hours? Arvados scales and parallelizes your work for you intelligently.
+            </li><li>
+              <strong>Share pipelines or data</strong>: Easily publish your work to the world, just like <a href="http://www.pathomap.org/2015/04/08/run-the-pathomap-human-ancestry-pipeline-on-arvados/">the Pathomap team did</a>.
+            </li><li>
+              <strong>Use existing pipelines</strong>: Use best-practices pipelines on your own data with the click of a button.
+            </li><li>
+              <strong>Open-source</strong>: Arvados is completely open-source. Check out our <a href="http://arvados.org">developer site</a>.
+            </li>
+          </ol>
+          <p style="margin-top: 1em;">
+            Want to use the command-line, or hungry to learn more? Check out the User Guide at <a href="http://doc.arvados.org/">doc.arvados.org</a>.
+          </p><p>
+            Questions still? Head over to <a href="http://doc.arvados.org/">doc.arvados.org</a> to find mailing-list and contact info for the Arvados community.
+          </p><p>
+            That's all, folks! Click the "x" up top to leave this guide.
+          </p>
+        </div>
+      </div>
+
+      <div class="modal-footer">
+        <div style="text-align:center">
+          <button class="btn btn-default pager-prev"><i class="fa fa-fw fa-chevron-left"></i><span style="font-weight: bold;"> Prev</span></button>
+          <button class="btn btn-default pager-next"><span style="font-weight: bold;">Next </span><i class="fa fa-fw fa-chevron-right"></i></button>
+          <div class="pager-count pull-right"><span style="margin:5px"></span></div>
+        </div>
+      </div>
+    </div>
+  </div>
+</div>
diff --git a/apps/workbench/app/views/layouts/application.html.erb b/apps/workbench/app/views/layouts/application.html.erb

index cdc47c17169401995a24a77c9e2a8cdcbc46d72f..5889bee57a85d586336b5e5d1cb05a311a76ff99 100644 (file)
--- a/apps/workbench/app/views/layouts/application.html.erb
+++ b/apps/workbench/app/views/layouts/application.html.erb
@@ -18,6 +18,22 @@
    <meta name="arv-websocket-url" content="<%=$arvados_api_client.discovery[:websocketUrl]%>?api_token=<%=Thread.current[:arvados_api_token]%>">
    <% end %>
    <meta name="robots" content="NOINDEX, NOFOLLOW">
+
+  <%# Feature #5645: Add open graph meta tags to generate this page's
+      social graph that search engines can use. http://ogp.me/ %>
+  <meta property="og:type" content="article" />
+  <meta property="og:url" content="<%= request.url %>" />
+  <meta property="og:site_name" content="<%= Rails.configuration.site_name %>" />
+  <% if @object %>
+    <% if @object.respond_to?(:name) and @object.name.present? %>
+      <meta property="og:title" content="<%= @object.name%>" />
+    <% end %>
+    <% if (@object.respond_to?(:description) rescue nil) and @object.description.present? %>
+      <meta property="og:description" content="<%= @object.description%>" />
+    <% end %>
+  <% end %>
+  <%# Done adding open graph meta tags %>
+
    <%= stylesheet_link_tag    "application", :media => "all" %>
    <%= javascript_include_tag "application" %>
    <%= csrf_meta_tags %>
@@ -37,9 +53,9 @@
      @media (max-width: 979px) { body { padding-top: 0; } }
  
      @media (max-width: 767px) {
-    .breadcrumbs {
-    display: none;
-    }
+      .breadcrumbs {
+        padding-top: 0;
+      }
      }
    </style>
    <link href="//netdna.bootstrapcdn.com/font-awesome/4.1.0/css/font-awesome.css" rel="stylesheet">
diff --git a/apps/workbench/app/views/layouts/body.html.erb b/apps/workbench/app/views/layouts/body.html.erb

index fbbd0fac26e32793d50f095973933750a52cbdef..22ccc2f91dd69a7c88fd66bc10e7f817de8bda1d 100644 (file)
--- a/apps/workbench/app/views/layouts/body.html.erb
+++ b/apps/workbench/app/views/layouts/body.html.erb
@@ -34,7 +34,7 @@
                            action_name: 'Show',
                            action_href: url_for(controller: :actions, action: :show),
                            action_method: 'get',
-                          action_data: {selection_param: 'uuid', success: 'redirect-to-created-object', copy_from_search_box: true}.to_json)
+                          action_data: {selection_param: 'uuid', success: 'redirect-to-created-object'}.to_json)
                           %>">
                  <div class="input-group" style="width: 220px">
                    <input type="text" class="form-control" placeholder="search">
@@ -47,16 +47,39 @@
              <li class="dropdown notification-menu">
                <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="notifications-menu">
                  <span class="badge badge-alert notification-count"><%= user_notifications.length if user_notifications.any? %></span>
-                <%= current_user.email %> <span class="caret"></span>
+                <span class="fa fa-lg fa-user"></span>
+                <span class="caret"></span>
                </a>
                <ul class="dropdown-menu" role="menu">
+                <li role="presentation" class="dropdown-header">
+                  <%= current_user.email %>
+                </li>
                  <% if current_user.is_active %>
-                <li role="menuitem"><a href="/manage_account" role="menuitem"><i class="fa fa-key fa-fw"></i> Manage account</a></li>
+                <li role="menuitem"><a href="/projects/<%=current_user.uuid%>" role="menuitem"><i class="fa fa-lg fa-home fa-fw"></i> Home project </a></li>
+
+                <li role="menuitem">
+                  <%= link_to virtual_machines_user_path(current_user), role: 'menu-item' do %>
+                    <i class="fa fa-lg fa-terminal fa-fw"></i> Virtual machines
+                  <% end %>
+                </li>
+                <li role="menuitem">
+                  <%= link_to repositories_user_path(current_user), role: 'menu-item' do %>
+                    <i class="fa fa-lg fa-code-fork fa-fw"></i> Repositories
+                  <% end %>
+                </li>
+                <li role="menuitem"><a href="/current_token" role="menuitem"><i class="fa fa-lg fa-ticket fa-fw"></i> Current token</a></li>
+                <li role="menuitem">
+                  <%= link_to ssh_keys_user_path(current_user), role: 'menu-item' do %>
+                    <i class="fa fa-lg fa-key fa-fw"></i> SSH keys
+                  <% end %>
+                </li>
+
                  <% if Rails.configuration.user_profile_form_fields %>
-                  <li role="menuitem"><a href="/users/<%=current_user.uuid%>/profile" role="menuitem"><i class="fa fa-key fa-fw"></i> Manage profile</a></li>
+                  <li role="menuitem"><a href="/users/<%=current_user.uuid%>/profile" role="menuitem"><i class="fa fa-lg fa-user fa-fw"></i> Manage profile</a></li>
                  <% end %>
                  <% end %>
-                <li role="menuitem"><a href="<%= logout_path %>" role="menuitem"><i class="fa fa-sign-out fa-fw"></i> Log out</a></li>
+                <li role="presentation" class="divider"></li>
+                <li role="menuitem"><a href="<%= logout_path %>" role="menuitem"><i class="fa fa-lg fa-sign-out fa-fw"></i> Log out</a></li>
                  <% if user_notifications.any? %>
                    <li role="presentation" class="divider"></li>
                    <% user_notifications.each_with_index do |n, i| %>
@@ -71,10 +94,11 @@
                <li class="dropdown">
                  <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="system-menu">
                    <span class="fa fa-lg fa-gear"></span>
+                  <span class="caret"></span>
                  </a>
                  <ul class="dropdown-menu" role="menu">
                    <li role="presentation" class="dropdown-header">
-                    Settings
+                    Admin Settings
                    </li>
                    <li role="menuitem"><a href="/repositories">
                        <i class="fa fa-lg fa-code-fork fa-fw"></i> Repositories
@@ -110,14 +134,38 @@
                </li>
              <% end %>
            <% else %>
-            <li><a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">Log in</a></li>
+            <% if Rails.configuration.anonymous_user_token and Rails.configuration.enable_public_projects_page %>
+              <li><%= link_to 'Browse public projects', "/projects/public" %></li>
+            <% end %>
+            <li class="dropdown hover-dropdown login-menu">
+              <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">Log in</a>
+              <ul class="dropdown-menu">
+                <li>
+                  <a href="<%= arvados_api_client.arvados_login_url(return_to: root_url) %>">
+                    <span class="fa fa-lg fa-sign-in"></span>
+                    <p style="margin-left: 1.6em; margin-top: -1.35em; margin-bottom: 0em; margin-right: 0.5em;">Log in or register with<br/>any Google account</p>
+                  </a>
+                </li>
+              </ul>
+            </li>
            <% end %>
  
            <li class="dropdown help-menu">
              <a href="#" class="dropdown-toggle" data-toggle="dropdown" id="arv-help">
                <span class="fa fa-lg fa-question-circle"></span>
+              <span class="caret"></span>
              </a>
              <ul class="dropdown-menu">
+              <li role="presentation" class="dropdown-header">
+                Help
+              </li>
+              <li>
+              <%= link_to raw('<i class="fa fa-fw fa-info"></i> Getting Started ...'), "#",
+                   {'data-toggle' => "modal", 'data-target' => '#getting-started-modal-window'}  %>
+              </li>
+              <% if Rails.configuration.arvados_public_data_doc_url %>
+                <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> Public Pipelines and Data sets'), "#{Rails.configuration.arvados_public_data_doc_url}", target: "_blank" %></li>
+              <% end %>
                <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> Tutorials and User guide'), "#{Rails.configuration.arvados_docsite}/user", target: "_blank" %></li>
                <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> API Reference'), "#{Rails.configuration.arvados_docsite}/api", target: "_blank" %></li>
                <li><%= link_to raw('<i class="fa fa-book fa-fw"></i> SDK Reference'), "#{Rails.configuration.arvados_docsite}/sdk", target: "_blank" %></li>
@@ -153,11 +201,31 @@
                <span class="caret"></span>
              </a>
              <ul class="dropdown-menu" style="min-width: 20em" role="menu">
-              <li>
-                <%= link_to projects_path(options: {ensure_unique_name: true}), method: :post, class: 'btn btn-xs btn-default pull-right' do %>
-                  <i class="fa fa-plus"></i> Add a new project
+              <li role="menuitem">
+                  <%= link_to(
+                        url_for(
+                          action: 'choose',
+                          controller: 'search',
+                          filters: [['uuid', 'is_a', 'arvados#group']].to_json,
+                          title: 'Search',
+                          action_name: 'Show',
+                          action_href: url_for(controller: :actions, action: :show),
+                          action_method: 'get',
+                          action_data: {selection_param: 'uuid', success: 'redirect-to-created-object'}.to_json),
+                        { remote: true, method: 'get', title: "Search" }) do %>
+                    <i class="glyphicon fa-fw glyphicon-search"></i> Search all projects ...
+                  <% end %>
+               </li>
+              <% if Rails.configuration.anonymous_user_token and Rails.configuration.enable_public_projects_page %>
+                <li role="menuitem"><a href="/projects/public" role="menuitem"><i class="fa fa-fw fa-list"></i> Browse public projects </a>
+                </li>
+              <% end %>
+              <li role="menuitem">
+                <%= link_to projects_path(options: {ensure_unique_name: true}), role: 'menu-item', method: :post do %>
+                  <i class="fa fa-fw fa-plus"></i> Add a new project
                  <% end %>
                </li>
+              <li role="presentation" class="divider"></li>
                <%= render partial: "projects_tree_menu", locals: {
                    :project_link_to => Proc.new do |pnode, &block|
                      link_to(project_path(pnode[:object].uuid),
@@ -188,9 +256,28 @@
            <% end %>
          </ul>
        </nav>
+    <% elsif !current_user %>   <%# anonymous %>
+      <% if (@name_link or @object) and (project_breadcrumbs.any?) %>
+        <nav class="navbar navbar-default breadcrumbs" role="navigation">
+          <ul class="nav navbar-nav navbar-left">
+            <li>
+              <a href="/projects/public">Public Projects</a>
+            </li>
+            <% project_breadcrumbs.each do |p| %>
+              <li class="nav-separator">
+                <i class="fa fa-lg fa-angle-double-right"></i>
+              </li>
+              <li>
+                <%= link_to(p.name, project_path(p.uuid), data: {object_uuid: p.uuid, name: 'name'}) %>
+              </li>
+            <% end %>
+          </ul>
+        </nav>
+      <% end %>
      <% end %>
  
      <%= render partial: 'browser_unsupported' %><%# requires JS support below %>
+    <%= render partial: 'getting_started/getting_started_popup' %>
  
      <div id="page-wrapper">
        <%= yield %>
@@ -202,3 +289,17 @@
  <div class="modal-container"></div>
  <div id="report-issue-modal-window"></div>
  <script src="/browser_unsupported.js"></script>
+
+<%  if current_user and !current_user.prefs[:getting_started_shown] and
+       !request.url.include?("/profile") and
+       !request.url.include?("/user_agreements") and
+       !request.url.include?("/inactive")%>
+  <script>
+    $("#getting-started-modal-window").modal('show');
+  </script>
+  <%
+    prefs = current_user.prefs
+    prefs[:getting_started_shown] = Time.now
+    current_user.update_attributes prefs: prefs.to_json
+  %>
+<% end %>
diff --git a/apps/workbench/app/views/notifications/_ssh_key_notification.html.erb b/apps/workbench/app/views/notifications/_ssh_key_notification.html.erb

index 989db3daff21d66de26ca13477bd2f5520c0de0a..382b98841f4b87637c7ca1981b273cf1200dffc5 100644 (file)
--- a/apps/workbench/app/views/notifications/_ssh_key_notification.html.erb
+++ b/apps/workbench/app/views/notifications/_ssh_key_notification.html.erb
@@ -1,6 +1,7 @@
     <%= image_tag "dax.png", class: "dax" %>
      <div>
        Hi, I noticed that you have not yet set up an SSH public key for use with Arvados.
-      <%= link_to "Click here to set up an SSH public key for use with Arvados.",
-      "/manage_account", style: "font-weight: bold" %>
+      <%= link_to ssh_keys_user_path(current_user) do %>
+        <b>Click here to set up an SSH public key for use with Arvados.</b>
+      <%end%>
      </div>
diff --git a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb

index 018d49f5e4a10f5ed9ab81385d8a2416b0b3a0b1..63a2371a1b3dd2dc398c9b672311bd606d64bdf7 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_running_component.html.erb
@@ -23,15 +23,10 @@
            <div class="col-md-3">
              <% if current_job[:started_at] %>
                <% walltime = ((if current_job[:finished_at] then current_job[:finished_at] else Time.now() end) - current_job[:started_at]) %>
-              <% cputime = tasks.map { |task|
-                   if task.started_at and task.job_uuid == current_job[:uuid]
-                     (if task.finished_at then task.finished_at else Time.now() end) - task.started_at
-                   else
-                     0
-                   end
-                 }.reduce(:+) || 0 %>
-              <%= render_runtime(walltime, false, false) %>
-              <% if cputime > 0 %> / <%= render_runtime(cputime, false, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
+              <% cputime = (current_job[:runtime_constraints].andand[:min_nodes] || 1) *
+                           ((current_job[:finished_at] || Time.now()) - current_job[:started_at]) %>
+              <%= render_runtime(walltime, false) %>
+              <% if cputime > 0 %> / <%= render_runtime(cputime, false) %> (<%= (cputime/walltime).round(1) %>&Cross;)<% end %>
              <% end %>
            </div>
            <% end %>
@@ -40,7 +35,7 @@
              <%# column offset 5 %>
              <div class="col-md-6">
                <% queuetime = Time.now - Time.parse(current_job[:created_at].to_s) %>
-              Queued for <%= render_runtime(queuetime, true) %>.
+              Queued for <%= render_runtime(queuetime, false) %>.
                <% begin %>
                  <% if current_job[:queue_position] == 0 %>
                    This job is next in the queue to run.
@@ -66,9 +61,9 @@
              <%# column offset 8 %>
              <div class="col-md-4 text-overflow-ellipsis">
                <% if pj[:output_uuid] %>
-                <%= link_to_if_arvados_object pj[:output_uuid], friendly_name: true %>
+                <%= link_to_arvados_object_if_readable(pj[:output_uuid], 'Output data not available', friendly_name: true) %>
                <% elsif current_job[:output] %>
-                <%= link_to_if_arvados_object current_job[:output], link_text: "Output of #{pj[:name]}" %>
+                <%= link_to_arvados_object_if_readable(current_job[:output], 'Output data not available', link_text: "Output of #{pj[:name]}") %>
                <% else %>
                  No output.
                <% end %>
@@ -96,6 +91,15 @@
          <div class="row">
            <div class="col-md-6">
              <table>
+              <% # link to repo tree/file only if the repo is readable
+                 # and the commit is a sha1...
+                 repo =
+                 (/^[0-9a-f]{40}$/ =~ current_component[:script_version] and
+                 Repository.where(name: current_component[:repository]).first)
+
+                 # ...and the api server provides an http:// or https:// url
+                 repo = nil unless repo.andand.http_fetch_url
+                 %>
                <% [:script, :repository, :script_version, :supplied_script_version, :nondeterministic].each do |k| %>
                  <tr>
                    <td style="padding-right: 1em">
@@ -104,6 +108,12 @@
                    <td>
                      <% if current_component[k].nil? %>
                        (none)
+                    <% elsif repo and k == :repository %>
+                      <%= link_to current_component[k], show_repository_tree_path(id: repo.uuid, commit: current_component[:script_version], path: '/') %>
+                    <% elsif repo and k == :script %>
+                      <%= link_to current_component[k], show_repository_blob_path(id: repo.uuid, commit: current_component[:script_version], path: 'crunch_scripts/'+current_component[:script]) %>
+                    <% elsif repo and k == :script_version %>
+                      <%= link_to current_component[k], show_repository_commit_path(id: repo.uuid, commit: current_component[:script_version]) %>
                      <% else %>
                        <%= current_component[k] %>
                      <% end %>
@@ -124,7 +134,8 @@
                      docker_image_locator:
                    </td>
                    <td>
-                    <%= link_to_if_arvados_object current_component[:docker_image_locator], friendly_name: true %>
+                    <%= link_to_arvados_object_if_readable(current_component[:docker_image_locator],
+                      current_component[:docker_image_locator], friendly_name: true) %>
                    </td>
                  </tr>
                <% else %>
@@ -148,9 +159,9 @@
                    </td>
                    <td>
                      <% if k == :uuid %>
-                      <%= link_to_if_arvados_object current_component[k], link_text: current_component[k] %>
+                      <%= link_to_arvados_object_if_readable(current_component[k], current_component[k], link_text: current_component[k]) %>
                      <% elsif k.to_s.end_with? 'uuid' %>
-                      <%= link_to_if_arvados_object current_component[k], friendly_name: true %>
+                      <%= link_to_arvados_object_if_readable(current_component[k], current_component[k], friendly_name: true) %>
                      <% elsif k.to_s.end_with? '_at' %>
                        <%= render_localized_date(current_component[k]) %>
                      <% else %>
diff --git a/apps/workbench/app/views/pipeline_instances/_show_components.html.erb b/apps/workbench/app/views/pipeline_instances/_show_components.html.erb

index 7735997748389e1d3fa68713dc53c39626b961bf..dae57aa0e85ebbe3ccf7ce183a185dcc860b3f0b 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_show_components.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_show_components.html.erb
@@ -13,6 +13,8 @@
  
  <% else %>
    <%# state is either New or Ready %>
+  <%= render_unreadable_inputs_present %>
+
    <p><i>Here are all of the pipeline's components (jobs that will need to run in order to complete the pipeline). If you know what you're doing (or you're experimenting) you can modify these parameters before starting the pipeline. Usually, you only need to edit the settings presented on the "Inputs" tab above.</i></p>
  
    <%= render_pipeline_components("editable", :json, editable: true) %>
diff --git a/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb b/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb

index d99ac23ab8c969f08f50cb3132ac610502762e0a..566e3d771e12796b8d8ebb7e273e34fe499def33 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_show_components_running.html.erb
@@ -20,7 +20,6 @@
    </p>
  <% end %>
  
-<% tasks = JobTask.filter([['job_uuid', 'in', job_uuids]]).results %>
  <% runningtime = determine_wallclock_runtime(pipeline_jobs.map {|j| j[:job]}.compact) %>
  
  <p>
@@ -42,9 +41,9 @@
                    end %>
  
      <%= if walltime > runningtime
-          render_runtime(walltime, true, false)
+          render_runtime(walltime, false)
          else
-          render_runtime(runningtime, true, false)
+          render_runtime(runningtime, false)
          end %><% if @object.finished_at %> at <%= render_localized_date(@object.finished_at) %><% end %>.
      <% else %>
        This pipeline is <%= if @object.state.start_with? 'Running' then 'active' else @object.state.downcase end %>.
@@ -64,23 +63,39 @@
        ran
      <% end %>
      for
-    <% cputime = tasks.map { |task|
-         if task.started_at
-           (if task.finished_at then task.finished_at else Time.now() end) - task.started_at
-           else
-         0
-       end
+    <%
+        cputime = pipeline_jobs.map { |j|
+        if j[:job][:started_at]
+          (j[:job][:runtime_constraints].andand[:min_nodes] || 1) * ((j[:job][:finished_at] || Time.now()) - j[:job][:started_at])
+        else
+          0
+        end
         }.reduce(:+) || 0 %>
-    <%= render_runtime(runningtime, true, false) %><% if (walltime - runningtime) > 0 %>
-      (<%= render_runtime(walltime - runningtime, true, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
+    <%= render_runtime(runningtime, false) %><% if (walltime - runningtime) > 0 %>
+      (<%= render_runtime(walltime - runningtime, false) %> queued)<% end %><% if cputime == 0 %>.<% else %>
        and used
-    <%= render_runtime(cputime, true, false) %>
-    of CPU time (<%= (cputime/runningtime).round(1) %>&Cross; scaling).
+    <%= render_runtime(cputime, false) %>
+    of node allocation time (<%= (cputime/runningtime).round(1) %>&Cross; scaling).
      <% end %>
  </p>
  
  <%# Components %>
  
+<%
+  job_uuids = pipeline_jobs.collect {|j| j[:job][:uuid]}.compact
+  if job_uuids.any?
+    resource_class = resource_class_for_uuid(job_uuids.first, friendly_name: true)
+    preload_objects_for_dataclass resource_class, job_uuids
+  end
+
+  job_collections = pipeline_jobs.collect {|j| j[:job][:output]}.compact
+  job_collections.concat pipeline_jobs.collect {|j| j[:job][:docker_image_locator]}.uniq.compact
+  job_collections_pdhs = job_collections.select {|x| !(m = CollectionsHelper.match(x)).nil?}.uniq.compact
+  job_collections_uuids = job_collections - job_collections_pdhs
+  preload_collections_for_objects job_collections_uuids if job_collections_uuids.any?
+  preload_for_pdhs job_collections_pdhs if job_collections_pdhs.any?
+%>
+
  <% pipeline_jobs.each_with_index do |pj, i| %>
-  <%= render partial: 'running_component', locals: {tasks: tasks, pj: pj, i: i, expanded: false} %>
+  <%= render partial: 'running_component', locals: {pj: pj, i: i, expanded: false} %>
  <% end %>
diff --git a/apps/workbench/app/views/pipeline_instances/_show_inputs.html.erb b/apps/workbench/app/views/pipeline_instances/_show_inputs.html.erb

index e6b7ef20347cf2a5c25af1b0304da4554a981b7a..567ca72504146667b3f7c04c4be70775466426bf 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_show_inputs.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_show_inputs.html.erb
@@ -32,6 +32,8 @@
  <% if n_inputs == 0 %>
    <p>This pipeline does not need any further inputs specified. You can start it by clicking the "Run" button whenever you're ready. (It's not too late to change existing settings, though.)</p>
  <% else %>
+  <%= render_unreadable_inputs_present %>
+
    <p><i>Provide <%= n_inputs > 1 ? 'values' : 'a value' %> for the following <%= n_inputs > 1 ? 'parameters' : 'parameter' %>, then click the "Run" button to start the pipeline.</i></p>
    <% if @object.editable? %>
      <%= content_for :pi_input_form %>
diff --git a/apps/workbench/app/views/pipeline_instances/_show_log.html.erb b/apps/workbench/app/views/pipeline_instances/_show_log.html.erb

index bb756a08274044fd867ca3cd18c5e10946bb5a5e..187dce7bd244e44468edbdda111c0ce1166406f6 100644 (file)
--- a/apps/workbench/app/views/pipeline_instances/_show_log.html.erb
+++ b/apps/workbench/app/views/pipeline_instances/_show_log.html.erb
@@ -1,12 +1,44 @@
-<% log_uuids = [@object.uuid] + pipeline_jobs(@object).collect{|x|x[:job].andand[:uuid]}.compact %>
-<% log_history = stderr_log_history(log_uuids) %>
-<div id="event_log_div"
-     class="arv-log-event-listener arv-log-event-handler-append-logs arv-log-event-subscribe-to-pipeline-job-uuids arv-job-log-window"
-     data-object-uuids="<%= log_uuids.join(' ') %>"
-     ><%= log_history.join("\n") %></div>
-
-<%# Applying a long throttle suppresses the auto-refresh of this
-    partial that would normally be triggered by arv-log-event. %>
-<div class="arv-log-refresh-control"
-     data-load-throttle="86486400000" <%# 1001 nights %>
-     ></div>
+<% log_ids = @object.job_log_ids
+   still_logging, done_logging = log_ids.keys.partition { |k| log_ids[k].nil? }
+%>
+
+<% unless done_logging.empty? %>
+  <table class="topalign table table-condensed table-fixedlayout">
+    <colgroup>
+      <col width="40%" />
+      <col width="60%" />
+    </colgroup>
+    <thead>
+      <tr>
+        <th>finished component</th>
+        <th>job log</th>
+      </tr>
+    </thead>
+    <tbody>
+      <% done_logging.each do |cname| %>
+      <tr>
+        <td><%= cname %></td>
+        <td><%= link_to("Log for #{cname}",
+                {controller: "collections", action: "show", id: log_ids[cname]})
+                %></td>
+      </tr>
+      <% end %>
+    </tbody>
+  </table>
+<% end %>
+
+<% unless still_logging.empty? %>
+  <h4>Logs in progress</h4>
+
+  <div id="event_log_div"
+       class="arv-log-event-listener arv-log-event-handler-append-logs arv-log-event-subscribe-to-pipeline-job-uuids arv-job-log-window"
+       data-object-uuids="<%= @object.stderr_log_object_uuids.join(' ') %>"
+       ><%= @object.stderr_log_lines.join("\n") %></div>
+
+  <%# Applying a long throttle suppresses the auto-refresh of this
+      partial that would normally be triggered by arv-log-event. %>
+  <div class="arv-log-refresh-control"
+       data-load-throttle="86486400000" <%# 1001 nights %>
+       ></div>
+<% end %>
+
diff --git a/apps/workbench/app/views/projects/_index_jobs_and_pipelines.html.erb b/apps/workbench/app/views/projects/_index_jobs_and_pipelines.html.erb

index fb9a30593adb5682be73b4e6eab48c47f0176dd6..04696845fd751a149cc55b28442cb41b3dea6ed6 100644 (file)
--- a/apps/workbench/app/views/projects/_index_jobs_and_pipelines.html.erb
+++ b/apps/workbench/app/views/projects/_index_jobs_and_pipelines.html.erb
@@ -6,7 +6,7 @@
        <div class="col-sm-4">
          <%= render :partial => "show_object_button", :locals => {object: object, size: 'xs'} %>
          <% if object.respond_to?(:name) %>
-          <%= render_editable_attribute object, 'name', nil, {tiptitle: 'rename'} %>
+          <%= render_editable_attribute object, 'name', nil, {}, {tiptitle: 'rename'} %>
          <% else %>
            <%= object.class_for_display %> <%= object.uuid %>
          <% end %>
diff --git a/apps/workbench/app/views/projects/_show_contents_rows.html.erb b/apps/workbench/app/views/projects/_show_contents_rows.html.erb

index a6a371275b35b54782c5e647eb611efd887090f7..5f0f60b0b26a18d2f6a5285144efdb8c96e080a4 100644 (file)
--- a/apps/workbench/app/views/projects/_show_contents_rows.html.erb
+++ b/apps/workbench/app/views/projects/_show_contents_rows.html.erb
@@ -26,7 +26,9 @@
      </td>
  
      <td>
-      <%= render_editable_attribute (name_link || object), 'name', nil, {tiptitle: 'rename'} %>
+      <% if object.respond_to?(:name) %>
+        <%= render_editable_attribute (name_link || object), 'name', nil, {}, {tiptitle: 'rename'} %>
+      <% end %>
      </td>
  
      <td class="arv-description-in-table">
diff --git a/apps/workbench/app/views/projects/_show_dashboard.html.erb b/apps/workbench/app/views/projects/_show_dashboard.html.erb

index 437f05a4998f996c203cfca10a5b5cb4caeb3939..47a2723547d5851c1cc9cfdc8f5b532ecacadb93 100644 (file)
--- a/apps/workbench/app/views/projects/_show_dashboard.html.erb
+++ b/apps/workbench/app/views/projects/_show_dashboard.html.erb
@@ -1,4 +1,3 @@
-
    <div class="row">
      <div class="col-md-6">
        <div class="panel panel-default" style="min-height: 10.5em">
@@ -65,7 +64,7 @@
              </div>
            <% end %>
            <% end %>
-          </div>
+        </div>
        </div>
  
        <div class="panel panel-default">
@@ -164,17 +163,15 @@
            <% r = recent_collections(8) %>
            <% r[:collections].each do |p| %>
              <div class="dashboard-panel-info-row">
-            <div>
-              <i class="fa fa-fw fa-folder-o"></i><%= link_to_if_arvados_object r[:owners][p[:owner_uuid]], friendly_name: true %>/
-              <span class="pull-right"><%= render_localized_date(p[:modified_at], "noseconds") %></span>
-            </div>
-            <div class="text-overflow-ellipsis" style="margin-left: 1em; width: 100%"><%= link_to_if_arvados_object p, {friendly_name: true, no_tags: true} %>
-            </div>
+              <div>
+                <i class="fa fa-fw fa-folder-o"></i><%= link_to_if_arvados_object r[:owners][p[:owner_uuid]], friendly_name: true %>/
+                <span class="pull-right"><%= render_localized_date(p[:modified_at], "noseconds") %></span>
+              </div>
+              <div class="text-overflow-ellipsis" style="margin-left: 1em; width: 100%"><%= link_to_if_arvados_object p, {friendly_name: true, no_tags: true} %>
+              </div>
              </div>
            <% end %>
          </div>
        </div>
      </div>
    </div>
-
-</div>
diff --git a/apps/workbench/app/views/projects/_show_description.html.erb b/apps/workbench/app/views/projects/_show_description.html.erb

index 726094074b1de7b810d4221353fda9e756ee12b0..443f359036a1776e655e32c36cd090c93870fd73 100644 (file)
--- a/apps/workbench/app/views/projects/_show_description.html.erb
+++ b/apps/workbench/app/views/projects/_show_description.html.erb
@@ -1,5 +1,5 @@
  <% if @object.respond_to? :description %>
    <div class="arv-description-as-subtitle">
-    <%= render_editable_attribute @object, 'description', nil, { 'data-emptytext' => "(No description provided)", 'data-toggle' => 'manual' } %>
+    <%= render_editable_attribute @object, 'description', nil, { 'data-emptytext' => "(No description provided)", 'data-toggle' => 'manual', 'data-mode' => 'inline', 'data-rows' => 10 }, { btntext: 'Edit', btnclass: 'primary', btnplacement: :top } %>
    </div>
  <% end %>
diff --git a/apps/workbench/app/views/projects/_show_tab_contents.html.erb b/apps/workbench/app/views/projects/_show_tab_contents.html.erb

index 0b308db6ff072e1f22c833831a586707cfbaa652..a33a1cfd355186c6effc6bfc07eb0f7b043defff 100644 (file)
--- a/apps/workbench/app/views/projects/_show_tab_contents.html.erb
+++ b/apps/workbench/app/views/projects/_show_tab_contents.html.erb
@@ -71,13 +71,17 @@
            <% end %>
          </ul>
        </div>
+      <div class="btn-group btn-group-sm">
+        <button id="select-all" type="button" class="btn btn-default" onClick="select_all_items()">Select all</button>
+        <button id="unselect-all" type="button" class="btn btn-default" onClick="unselect_all_items()">Unselect all</button>
+      </div>
      </div>
      <div class="col-sm-4 pull-right">
        <input type="text" class="form-control filterable-control" placeholder="Search project contents" data-filterable-target="table.arv-index.arv-project-<%= tab_pane %> tbody"/>
      </div>
    </div>
  
-  <table class="table table-condensed arv-index arv-project-<%= tab_pane %>">
+  <table class="table table-condensed arv-index arv-selectable-items arv-project-<%= tab_pane %>">
      <colgroup>
        <col width="0*" style="max-width: fit-content;" />
        <col width="0*" style="max-width: fit-content;" />
diff --git a/apps/workbench/app/views/projects/public.html.erb b/apps/workbench/app/views/projects/public.html.erb

new file mode 100644 (file)

index 0000000..8a57461
--- /dev/null
+++ b/apps/workbench/app/views/projects/public.html.erb
@@ -0,0 +1,29 @@
+<table class="table">
+  <colgroup>
+    <col width="25%" />
+    <col width="75%" />
+  </colgroup>
+  <thead>
+    <tr class="contain-align-left">
+      <th>
+        Name
+      </th>
+      <th>
+        Description
+      </th>
+    </tr>
+  </thead>
+
+  <tbody>
+  <% @objects.each do |p| %>
+    <tr>
+      <td>
+        <%= link_to_if_arvados_object p, {friendly_name: true} %>
+      </td>
+      <td>
+        <%= render_attribute_as_textile(p, "description", p.description, true) %>
+      </td>
+    </tr>
+  <% end %>
+  </tbody>
+</table>
diff --git a/apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb b/apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb

new file mode 100644 (file)

index 0000000..14f9ba7
--- /dev/null
+++ b/apps/workbench/app/views/repositories/_repository_breadcrumbs.html.erb
@@ -0,0 +1,13 @@
+<div class="pull-right">
+  <span class="deemphasize">Browsing <%= @object.name %> repository at commit</span>
+  <%= link_to(@commit, show_repository_commit_path(id: @object.uuid, commit: @commit), title: 'show commit message') %>
+</div>
+<p>
+  <%= link_to(@object.name, show_repository_tree_path(id: @object.uuid, commit: @commit, path: ''), title: 'show root directory of source tree') %>
+  <% parents = ''
+     (@path || '').split('/').each do |pathpart|
+     parents = parents + pathpart + '/'
+     %>
+    / <%= link_to pathpart, show_repository_tree_path(id: @object.uuid, commit: @commit, path: parents) %>
+  <% end %>
+</p>
diff --git a/apps/workbench/app/views/repositories/show_blob.html.erb b/apps/workbench/app/views/repositories/show_blob.html.erb

new file mode 100644 (file)

index 0000000..acc34d1
--- /dev/null
+++ b/apps/workbench/app/views/repositories/show_blob.html.erb
@@ -0,0 +1,13 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<% if not @blobdata.valid_encoding? %>
+  <div class="alert alert-warning">
+    <p>
+      This file has an invalid text encoding, so it can't be shown
+      here.  (This probably just means it's a binary file, not a text
+      file.)
+    </p>
+  </div>
+<% else %>
+  <pre><%= @blobdata %></pre>
+<% end %>
diff --git a/apps/workbench/app/views/repositories/show_commit.html.erb b/apps/workbench/app/views/repositories/show_commit.html.erb

new file mode 100644 (file)

index 0000000..3690be6
--- /dev/null
+++ b/apps/workbench/app/views/repositories/show_commit.html.erb
@@ -0,0 +1,3 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<pre><%= @object.show @commit %></pre>
diff --git a/apps/workbench/app/views/repositories/show_tree.html.erb b/apps/workbench/app/views/repositories/show_tree.html.erb

new file mode 100644 (file)

index 0000000..4e2fcec
--- /dev/null
+++ b/apps/workbench/app/views/repositories/show_tree.html.erb
@@ -0,0 +1,40 @@
+<%= render partial: 'repository_breadcrumbs' %>
+
+<table class="table table-condensed table-hover">
+  <thead>
+    <tr>
+      <th>File</th>
+      <th class="data-size">Size</th>
+    </tr>
+  </thead>
+  <tbody>
+    <% @subtree.each do |mode, sha1, size, subpath| %>
+      <tr>
+        <td>
+          <span style="opacity: 0.6">
+            <% pathparts = subpath.sub(/^\//, '').split('/')
+               basename = pathparts.pop
+               parents = @path
+               pathparts.each do |pathpart| %>
+              <% parents = parents + '/' + pathpart %>
+              <%= link_to pathpart, url_for(path: parents) %>
+              /
+            <% end %>
+          </span>
+          <%= link_to basename, url_for(action: :show_blob, path: parents + '/' + basename) %>
+        </td>
+        <td class="data-size">
+          <%= human_readable_bytes_html(size) %>
+        </td>
+      </tr>
+    <% end %>
+    <% if @subtree.empty? %>
+      <tr>
+        <td>
+          No files found.
+        </td>
+      </tr>
+    <% end %>
+  </tbody>
+  <tfoot></tfoot>
+</table>
diff --git a/apps/workbench/app/views/users/_add_repository_modal.html.erb b/apps/workbench/app/views/users/_add_repository_modal.html.erb

new file mode 100644 (file)

index 0000000..db74ec5
--- /dev/null
+++ b/apps/workbench/app/views/users/_add_repository_modal.html.erb
@@ -0,0 +1,41 @@
+<%
+   if current_user.uuid.ends_with?("-000000000000000")
+     repo_prefix = ""
+   else
+     repo_prefix = current_user.username + "/"
+   end
+-%>
+<div class="modal" id="add-repository-modal" tabindex="-1" role="dialog" aria-labelledby="add-repository-label" aria-hidden="true">
+  <div class="modal-dialog">
+    <div class="modal-content">
+      <form id="add-repository-form">
+        <input type="hidden" id="add_repo_owner_uuid" name="add_repo_owner_uuid" value="<%= current_user.uuid %>">
+        <input type="hidden" id="add_repo_prefix" name="add_repo_prefix" value="<%= repo_prefix %>">
+        <div class="modal-header">
+          <button type="button" class="close" data-dismiss="modal" aria-label="Close"><span aria-hidden="true">&times;</span></button>
+          <h4 class="modal-title" id="add-repository-label">Add new repository</h4>
+        </div>
+        <div class="modal-body form-horizontal">
+          <div class="form-group">
+            <label for="add_repo_basename" class="col-sm-2 control-label">Name</label>
+            <div class="col-sm-10">
+              <div class="input-group arvados-uuid">
+                <% unless repo_prefix.empty? %>
+                  <span class="input-group-addon"><%= repo_prefix %></span>
+                <% end %>
+                <input type="text" class="form-control" id="add_repo_basename" name="add_repo_basename">
+                <span class="input-group-addon">.git</span>
+              </div>
+            </div>
+          </div>
+          <p class="alert alert-info">It may take a minute or two before you can clone your new repository.</p>
+          <p id="add-repository-error" class="alert alert-danger"></p>
+        </div>
+        <div class="modal-footer">
+          <button type="button" class="btn btn-default" data-dismiss="modal">Cancel</button>
+          <input type="submit" class="btn btn-primary" id="add-repository-submit" name="submit" value="Create">
+        </div>
+      </form>
+    </div>
+  </div>
+</div>
diff --git a/apps/workbench/app/views/users/_add_ssh_key_popup.html.erb b/apps/workbench/app/views/users/_add_ssh_key_popup.html.erb

index 25f4d75be19b0df6a450942bfd931f59aadc25c6..e6314cd1587cf4e4d9204e597c5cf69f2531b13b 100644 (file)
--- a/apps/workbench/app/views/users/_add_ssh_key_popup.html.erb
+++ b/apps/workbench/app/views/users/_add_ssh_key_popup.html.erb
@@ -29,7 +29,7 @@
        </div>
  
        <div class="modal-footer">
-        <button class="btn btn-default" onClick="reset_form()" data-dismiss="modal" aria-hidden="true">Cancel</button>
+        <button type="button" class="btn btn-default" onClick="reset_form()" data-dismiss="modal" aria-hidden="true">Cancel</button>
          <button type="submit" class="btn btn-primary" autofocus>Submit</button>
        </div>
  
diff --git a/apps/workbench/app/views/users/_manage_current_token.html.erb b/apps/workbench/app/views/users/_current_token.html.erb

similarity index 52%

rename from apps/workbench/app/views/users/_manage_current_token.html.erb

rename to apps/workbench/app/views/users/_current_token.html.erb

index 71c6bd246ba7a416b789ca9c12e9e62341f0dc55..2bf9f4d87b7a460c1620e7795d5cd694d8624c8b 100644 (file)
--- a/apps/workbench/app/views/users/_manage_current_token.html.erb
+++ b/apps/workbench/app/views/users/_current_token.html.erb
@@ -1,5 +1,15 @@
+<div class="panel panel-default">
+  <div class="panel-heading">
+    <h4 class="panel-title">
+      <a data-parent="#arv-adv-accordion" href="/current_token">
+        Current Token
+      </a>
+    </h4>
+  </div>
+
+<div id="#manage_current_token" class="panel-body">
  <p>The Arvados API token is a secret key that enables the Arvados SDKs to access Arvados with the proper permissions. For more information see <%= link_to raw('Getting an API token'), "#{Rails.configuration.arvados_docsite}/user/reference/api-tokens.html", target: "_blank"%>.</p>
-<p>Paste the following lines at a shell prompt to set up the necessary environment for Arvados SDKs to authenticate to your account, <b><%= current_user.email %></b></p>
+<p>Paste the following lines at a shell prompt to set up the necessary environment for Arvados SDKs to authenticate to your <b><%= current_user.username %></b> account.</p>
  
  <pre>
  HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'
@@ -11,3 +21,6 @@ export ARVADOS_API_HOST_INSECURE=true
  unset ARVADOS_API_HOST_INSECURE
  <% end %>
  </pre>
+<p>Arvados<%= link_to virtual_machines_user_path(current_user) do%> virtual machines<%end%> do this for you automatically. This setup is needed only when you use the API remotely (e.g., from your own workstation).</p>
+</div>
+</div>
diff --git a/apps/workbench/app/views/users/_manage_account.html.erb b/apps/workbench/app/views/users/_manage_account.html.erb

deleted file mode 100644 (file)

index 5024fce..0000000
--- a/apps/workbench/app/views/users/_manage_account.html.erb
+++ /dev/null
@@ -1,52 +0,0 @@
-<div class="col-sm-6">
-  <div class="panel-group" id="arv-adv-accordion">
-    <% ['Virtual Machines',
-       'Repositories'].each do |section| %>
-      <% section_id = section.gsub(" ","_").downcase %>
-      <div class="panel panel-default">
-        <div class="panel-heading">
-          <h4 class="panel-title">
-            <a data-parent="#arv-adv-accordion" href="#manage_<%=section_id%>">
-              <%= section %>
-            </a>
-          </h4>
-        </div>
-        <div id="manage_<%=section_id%>">
-          <div class="panel-body">
-            <%= render partial: "manage_#{section_id}" %>
-          </div>
-        </div>
-      </div>
-    <% end %>
-  </div>
-</div>
-<div class="col-sm-6">
-  <div class="panel-group" id="arv-adv-accordion">
-    <% ['SSH Keys',
-      'Current Token'].each do |section| %>
-      <% section_id = section.gsub(" ","_").downcase %>
-      <div class="panel panel-default">
-        <div class="panel-heading">
-          <% if section_id == 'ssh_keys' %>
-            <div class="pull-right">
-              <%= link_to raw('<i class="fa fa-plus"></i>' " Add new SSH key"), add_ssh_key_popup_url,
-                           {class: 'btn btn-xs btn-primary', :remote => true, 'data-toggle' =>  "modal",
-                            'data-target' => '#add-ssh-key-modal-window'}  %>
-            </div>
-          <% end %>
-          <h4 class="panel-title">
-            <a data-parent="#arv-adv-accordion" href="#manage_<%=section_id%>">
-              <%= section %>
-            </a>
-          </h4>
-        </div>
-        <div id="manage_<%=section_id%>">
-          <div class="panel-body">
-            <%= render partial: "manage_#{section_id}" %>
-          </div>
-        </div>
-      </div>
-    <% end %>
-  </div>
-  <div id="add-ssh-key-modal-window" class="modal fade" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true"></div>
-</div>
diff --git a/apps/workbench/app/views/users/_manage_repositories.html.erb b/apps/workbench/app/views/users/_repositories.html.erb

similarity index 58%

rename from apps/workbench/app/views/users/_manage_repositories.html.erb

rename to apps/workbench/app/views/users/_repositories.html.erb

index 83ec30a8146f9311a03f817568ccd46f14d05075..d409b8c44b3a1af64591c764e50407bbf4e3c211 100644 (file)
--- a/apps/workbench/app/views/users/_manage_repositories.html.erb
+++ b/apps/workbench/app/views/users/_repositories.html.erb
@@ -1,4 +1,18 @@
-<div>
+<div class="panel panel-default">
+  <div class="panel-heading">
+    <div class="pull-right">
+      <%= link_to raw('<i class="fa fa-plus"></i> Add new repository'), "#",
+                   {class: 'btn btn-xs btn-primary', 'data-toggle' => "modal",
+                    'data-target' => '#add-repository-modal'}  %>
+    </div>
+    <h4 class="panel-title">
+      <%= link_to repositories_user_path(current_user) do%>
+        Repositories
+      <%end%>
+    </h4>
+  </div>
+
+<div id="manage_repositories" class="panel-body">
    <p>
      For more information see <%= link_to raw('Writing a pipeline'),
      "#{Rails.configuration.arvados_docsite}/user/tutorials/tutorial-firstscript.html", target: "_blank"%>.
@@ -32,7 +46,8 @@
                <%= writable ? 'writable' : 'read-only' %>
              </td>
              <td style="word-break:break-all;">
-              <code><%= writable ? repo[:push_url] : repo[:fetch_url] %></code>
+              <code><%= repo.http_fetch_url %></code><br/>
+              <code><%= writable ? repo.push_url : repo.fetch_url %></code>
              </td>
              <td>
                <% if writable == 'can_manage' %>
@@ -45,3 +60,6 @@
      </table>
    <% end %>
  </div>
+</div>
+  <p>When you are using an Arvados virtual machine, you should clone the https:// URLs. This will authenticate automatically using your API token.</p>
+  <p>In order to clone git repositories using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and clone the git@ URLs.</p>
diff --git a/apps/workbench/app/views/users/_setup_popup.html.erb b/apps/workbench/app/views/users/_setup_popup.html.erb

index a1a664ce838115b875c460cd988d5260490430f1..36869cde2f52ba30e1b75a7993e3697f3b2e7529 100644 (file)
--- a/apps/workbench/app/views/users/_setup_popup.html.erb
+++ b/apps/workbench/app/views/users/_setup_popup.html.erb
@@ -7,7 +7,7 @@
      <div class="modal-header">
        <button type="button" class="close" onClick="reset_form()" data-dismiss="modal" aria-hidden="true">&times;</button>
        <div>
-        <div class="col-sm-6"> <h4 class="modal-title">Setup User</h4> </div>
+        <div class="col-sm-6"> <h4 class="modal-title">Setup Shell Account</h4> </div>
          <div class="spinner spinner-32px spinner-h-center col-sm-1" hidden="true"></div>
        </div>
        <br/>
@@ -21,12 +21,12 @@
        <% disable_email = uuid != nil %>
        <% identity_url_prefix = @current_selections[:identity_url_prefix] %>
        <% disable_url_prefix = identity_url_prefix != nil %>
-      <% selected_repo = @current_selections[:repo_name] %>
        <% selected_vm = @current_selections[:vm_uuid] %>
+      <% groups = @current_selections[:groups] %>
  
        <input id="user_uuid" maxlength="250" name="user_uuid" type="hidden" value="<%=uuid%>">
        <div class="form-group">
-       <label for="email">Email</label>
+        <label for="email">Email</label>
          <% if disable_email %>
          <input class="form-control" id="email" maxlength="250" name="email" type="text" value="<%=email%>" disabled>
          <% else %>
@@ -43,24 +43,24 @@
                 value="<%= Rails.configuration.default_openid_prefix %>">
          <% end %>
        </div>
-      <div class="form-group">
-        <label for="repo_name">Repository Name and Shell Login</label>
-        <input class="form-control" id="repo_name" maxlength="250" name="repo_name" type="text" value="<%=selected_repo%>">
-      </div>
        <div class="form-group">
          <label for="vm_uuid">Virtual Machine</label>
          <select class="form-control" name="vm_uuid">
            <option value="" <%= 'selected' unless selected_vm %>>
-           Choose One:
-         </option>
+            Choose One:
+          </option>
            <% @vms.each do |vm| %>
              <option value="<%=vm.uuid%>"
-                   <%= 'selected' if selected_vm == vm.uuid %>>
-             <%= vm.hostname %>
-           </option>
+              <%= 'selected' if selected_vm == vm.uuid %>>
+              <%= vm.hostname %>
+            </option>
            <% end %>
          </select>
        </div>
+      <div class="groups-group">
+        <label for="groups">Groups for virtual machine (comma separated list)</label>
+        <input class="form-control" id="groups" maxlength="250" name="groups" type="text" value="<%=groups%>">
+      </div>
      </div>
  
      <div class="modal-footer">
diff --git a/apps/workbench/app/views/users/_show_admin.html.erb b/apps/workbench/app/views/users/_show_admin.html.erb

index a34d7e6949ba1df3f9073896db5d2e61a67a9be8..54643a1c189476ae55b99e526b26ad85cd492ae8 100644 (file)
--- a/apps/workbench/app/views/users/_show_admin.html.erb
+++ b/apps/workbench/app/views/users/_show_admin.html.erb
@@ -11,14 +11,12 @@
      </blockquote>
  
      <p>
-      As an admin, you can setup this user. Please input a VM and
-      repository for the user. If you had previously provided any of
-      these items, they are pre-filled for you and you can leave them
-      as is if you would like to reuse them.
+      As an admin, you can setup a shell account for this user.
+      The login name is automatically generated from the user's e-mail address.
      </p>
  
      <blockquote>
-      <%= link_to "Setup #{@object.full_name}", setup_popup_user_url(id: @object.uuid),  {class: 'btn btn-primary', :remote => true, 'data-toggle' =>  "modal", 'data-target' => '#user-setup-modal-window'}  %>
+      <%= link_to "Setup shell account #{'for ' if @object.full_name.present?} #{@object.full_name}", setup_popup_user_url(id: @object.uuid),  {class: 'btn btn-primary', :remote => true, 'data-toggle' =>  "modal", 'data-target' => '#user-setup-modal-window'}  %>
      </blockquote>
  
      <p>
diff --git a/apps/workbench/app/views/users/_manage_ssh_keys.html.erb b/apps/workbench/app/views/users/_ssh_keys.html.erb

similarity index 73%

rename from apps/workbench/app/views/users/_manage_ssh_keys.html.erb

rename to apps/workbench/app/views/users/_ssh_keys.html.erb

index 1ea8f0bf87f698a5199cd357163c7f07df973796..794e89c8867ca9afe7f91ca7e0c049bc45794375 100644 (file)
--- a/apps/workbench/app/views/users/_manage_ssh_keys.html.erb
+++ b/apps/workbench/app/views/users/_ssh_keys.html.erb
@@ -1,10 +1,23 @@
-<div>
+<div class="panel panel-default">
+  <div class="panel-heading">
+    <div class="pull-right">
+      <%= link_to raw('<i class="fa fa-plus"></i>' " Add new SSH key"), add_ssh_key_popup_url,
+                   {class: 'btn btn-xs btn-primary', :remote => true, 'data-toggle' =>  "modal",
+                    'data-target' => '#add-ssh-key-modal-window'}  %>
+    </div>
+    <h4 class="panel-title">
+      <%= link_to ssh_keys_user_path(current_user) do %>
+        SSH Keys
+      <%end%>
+    </h4>
+  </div>
+
+<div id="manage_ssh_keys" class="panel-body">
    <% if !@my_ssh_keys.any? %>
-     <p> You have not yet set up an SSH public key for use with Arvados. </p>
-     <p>  <%= link_to "Click here",
+     <p> You have not yet set up an SSH public key for use with Arvados. <%= link_to "Learn more.",
                    "#{Rails.configuration.arvados_docsite}/user/getting_started/ssh-access-unix.html",
                    style: "font-weight: bold",
-                  target: "_blank" %>  to learn about SSH keys in Arvados.
+                  target: "_blank" %>
       </p>
       <p> When you have an SSH key you would like to use, add it using the <b>Add</b> button. </p>
    <% else %>
@@ -53,3 +66,4 @@
      </table>
    <% end %>
  </div>
+</div>
diff --git a/apps/workbench/app/views/users/_manage_virtual_machines.html.erb b/apps/workbench/app/views/users/_virtual_machines.html.erb

similarity index 70%

rename from apps/workbench/app/views/users/_manage_virtual_machines.html.erb

rename to apps/workbench/app/views/users/_virtual_machines.html.erb

index 43f2b7787c6f8e83bbf66bbd029bbd4e55fa5412..fc54178dcca9d56860f936121c9d76e3731d148c 100644 (file)
--- a/apps/workbench/app/views/users/_manage_virtual_machines.html.erb
+++ b/apps/workbench/app/views/users/_virtual_machines.html.erb
@@ -1,11 +1,18 @@
-<div>
+<div class="panel panel-default">
+  <div class="panel-heading">
+    <h4 class="panel-title">
+      <%= link_to virtual_machines_user_path(current_user) do %>
+        Virtual Machines
+      <% end %>
+
+    </h4>
+  </div>
+
+<div id="manage_virtual_machines" class="panel-body">
    <p>
-    For more information see <%= link_to raw('Arvados Docs &rarr; User Guide &rarr; SSH access'),
-  "#{Rails.configuration.arvados_docsite}/user/getting_started/ssh-access-unix.html",
+    For more information see <%= link_to raw('Arvados Docs &rarr; User Guide &rarr; VM access'),
+  "#{Rails.configuration.arvados_docsite}/user/getting_started/vm-login-with-webshell.html",
    target: "_blank"%>.
-    <% if @my_virtual_machines.any? or true %>
-      A sample <code>~/.ssh/config</code> entry is provided below.
-    <% end %>
    </p>
  
    <% if !@my_virtual_machines.any? %>
@@ -57,6 +64,9 @@
            <th> Host name </th>
            <th> Login name </th>
            <th> Command line </th>
+          <% if Rails.configuration.shell_in_a_box_url %>
+            <th> Web shell <span class="label label-info">beta</span></th>
+          <% end %>
          </tr>
        </thead>
        <tbody>
@@ -75,16 +85,25 @@
                  <% end %>
                <% end %>
              </td>
+            <% if Rails.configuration.shell_in_a_box_url %>
+              <td>
+                <% @my_vm_logins[vm[:uuid]].andand.each do |login| %>
+                  <%= link_to webshell_virtual_machine_path(vm, login: login), title: "Open a terminal session in your browser", class: 'btn btn-xs btn-default', target: "_blank" do %>
+                    Log in as <%= login %><br />
+                  <% end %>
+                <% end %>
+              </td>
+            <% end %>
            </tr>
          <% end %>
        </tbody>
      </table>
-
-    <p><i>~/.ssh/config:</i></p>
+  <% end %>
+</div>
+</div>
+  <p>In order to access virtual machines using SSH, <%= link_to ssh_keys_user_path(current_user) do%> add an SSH key to your account<%end%> and add a section like this to your SSH configuration file ( <i>~/.ssh/config</i>):</p>
      <pre>Host *.arvados
        TCPKeepAlive yes
        ServerAliveInterval 60
        ProxyCommand ssh -p2222 turnout@switchyard.<%= current_api_host || 'xyzzy.arvadosapi.com' %> -x -a $SSH_PROXY_FLAGS %h
      </pre>
-  <% end %>
-</div>
diff --git a/apps/workbench/app/views/users/current_token.html.erb b/apps/workbench/app/views/users/current_token.html.erb

new file mode 100644 (file)

index 0000000..82d828b
--- /dev/null
+++ b/apps/workbench/app/views/users/current_token.html.erb
@@ -0,0 +1 @@
+<%= render :partial => 'current_token' %>
diff --git a/apps/workbench/app/views/users/manage_account.html.erb b/apps/workbench/app/views/users/manage_account.html.erb

deleted file mode 100644 (file)

index 0751866..0000000
--- a/apps/workbench/app/views/users/manage_account.html.erb
+++ /dev/null
@@ -1 +0,0 @@
-<%= render :partial => 'manage_account' %>
diff --git a/apps/workbench/app/views/users/profile.html.erb b/apps/workbench/app/views/users/profile.html.erb

index 45aea01b957c419375cb103c46aec5b60315c95e..4fefa821caafb88be0a7647112514f98fa7c55f8 100644 (file)
--- a/apps/workbench/app/views/users/profile.html.erb
+++ b/apps/workbench/app/views/users/profile.html.erb
@@ -16,7 +16,11 @@
          <div class="panel-body">
            <% if !missing_required_profile? && params[:offer_return_to] %>
              <div class="alert alert-success">
-              <p>Thank you for filling in your profile. <%= link_to 'Back to work!', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% if current_user.prefs[:getting_started_shown] %>
+                <p>Thank you for filling in your profile. <%= link_to 'Back to work!', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% else %>
+                <p>Thank you for filling in your profile. <%= link_to 'Get started', params[:offer_return_to], class: 'btn btn-sm btn-primary' %></p>
+              <% end %>
              </div>
            <% else %>
              <div class="alert alert-info">
@@ -82,6 +86,13 @@
                  <% end %>
                <% end %>
  
+              <%# If the user has other prefs, we need to preserve them %>
+              <% current_user.prefs.each do |key, value| %>
+                <% if key != :profile %>
+                  <input type="hidden" name="user[prefs][:<%=key%>]" value="<%=value.to_json%>">
+                <% end %>
+              <% end %>
+
                <% if show_save_button %>
                  <div class="form-group">
                    <div class="col-sm-offset-3 col-sm-8">
diff --git a/apps/workbench/app/views/users/repositories.html.erb b/apps/workbench/app/views/users/repositories.html.erb

new file mode 100644 (file)

index 0000000..98a1715
--- /dev/null
+++ b/apps/workbench/app/views/users/repositories.html.erb
@@ -0,0 +1,2 @@
+<%= render :partial => 'repositories' %>
+<%= render partial: "add_repository_modal" %>
diff --git a/apps/workbench/app/views/users/ssh_keys.html.erb b/apps/workbench/app/views/users/ssh_keys.html.erb

new file mode 100644 (file)

index 0000000..d44e642
--- /dev/null
+++ b/apps/workbench/app/views/users/ssh_keys.html.erb
@@ -0,0 +1,2 @@
+<%= render :partial => 'ssh_keys' %>
+<div id="add-ssh-key-modal-window" class="modal fade" role="dialog" aria-labelledby="myModalLabel" aria-hidden="true"></div>
diff --git a/apps/workbench/app/views/users/virtual_machines.html.erb b/apps/workbench/app/views/users/virtual_machines.html.erb

new file mode 100644 (file)

index 0000000..2940b2b
--- /dev/null
+++ b/apps/workbench/app/views/users/virtual_machines.html.erb
@@ -0,0 +1 @@
+<%= render :partial => 'virtual_machines' %>
diff --git a/apps/workbench/app/views/virtual_machines/webshell.html.erb b/apps/workbench/app/views/virtual_machines/webshell.html.erb

new file mode 100644 (file)

index 0000000..a5507b3
--- /dev/null
+++ b/apps/workbench/app/views/virtual_machines/webshell.html.erb
@@ -0,0 +1,49 @@
+<html>
+    <title><%= @object.hostname %> / <%= Rails.configuration.site_name %></title>
+    <link rel="stylesheet" href="<%= asset_path 'webshell/styles.css' %>" type="text/css">
+    <style type="text/css">
+      body {
+        margin: 0px;
+      }
+    </style>
+    <script type="text/javascript"><!--
+      (function() {
+        // We would like to hide overflowing lines as this can lead to
+        // visually jarring results if the browser substitutes oversized
+        // Unicode characters from different fonts. Unfortunately, a bug
+        // in Firefox prevents it from allowing multi-line text
+        // selections whenever we change the "overflow" style. So, only
+        // do so for non-Netscape browsers.
+        if (typeof navigator.appName == 'undefined' ||
+            navigator.appName != 'Netscape') {
+          document.write('<style type="text/css">' +
+                         '#vt100 #console div, #vt100 #alt_console div {' +
+                         '  overflow: hidden;' +
+                         '}' +
+                         '</style>');
+        }
+      })();
+
+      function login(username, token) {
+        var sh = new ShellInABox("<%= j @webshell_url %>");
+        setTimeout(function() {
+          sh.keysPressed("<%= j params[:login] %>\n");
+          setTimeout(function() {
+            sh.keysPressed("<%= j Thread.current[:arvados_api_token] %>\n");
+            sh.vt100('(sent authentication token)\n');
+          }, 2000);
+        }, 2000);
+      }
+    // -->
+</script>
+    <link rel="icon" href="<%= asset_path 'favicon.ico' %>" type="image/x-icon">
+    <script type="text/javascript" src="<%= asset_path 'webshell/shell_in_a_box.js' %>"></script>
+  </head>
+  <!-- Load ShellInABox from a timer as Konqueror sometimes fails to
+       correctly deal with the enclosing frameset (if any), if we do not
+       do this
+   -->
+<body onload="setTimeout(login, 1000)"
+    scroll="no"><noscript>JavaScript must be enabled for ShellInABox</noscript>
+</body>
+</html>
diff --git a/apps/workbench/config/application.default.yml b/apps/workbench/config/application.default.yml

index a4870818800b87e5f03a3774528acaef1d1ff492..744c0c3ba3b7b3b8f1340df03045b554eab1ea82 100644 (file)
--- a/apps/workbench/config/application.default.yml
+++ b/apps/workbench/config/application.default.yml
@@ -88,7 +88,7 @@ test:
    action_controller.allow_forgery_protection: false
    action_mailer.delivery_method: :test
    active_support.deprecation: :stderr
-  profiling_enabled: false
+  profiling_enabled: true
    secret_token: <%= rand(2**256).to_s(36) %>
    secret_key_base: <%= rand(2**256).to_s(36) %>
  
@@ -131,6 +131,7 @@ common:
    arvados_insecure_https: true
    activation_contact_link: mailto:info@arvados.org
    arvados_docsite: http://doc.arvados.org
+  arvados_public_data_doc_url: http://arvados.org/projects/arvados/wiki/Public_Pipelines_and_Datasets
    arvados_theme: default
    show_user_agreement_inline: false
    secret_token: ~
@@ -138,8 +139,14 @@ common:
    default_openid_prefix: https://www.google.com/accounts/o8/id
    send_user_setup_notification_email: true
  
-  # Set user_profile_form_fields to enable and configure the user profile page.
-  # Default is set to false. A commented setting with full description is provided below.
+  # Scratch directory used by the remote repository browsing
+  # feature. If it doesn't exist, it (and any missing parents) will be
+  # created using mkdir_p.
+  repository_cache: <%= File.expand_path 'tmp/git', Rails.root %>
+
+  # Set user_profile_form_fields to enable and configure the user
+  # profile page. Default is set to false. A commented example with
+  # full description is provided below.
    user_profile_form_fields: false
  
    # Below is a sample setting of user_profile_form_fields config parameter.
@@ -201,3 +208,19 @@ common:
    # the token by running "bundle exec ./script/get_anonymous_user_token.rb"
    # in the directory where your API server is running.
    anonymous_user_token: false
+
+  # when anonymous_user_token is configured, show public projects page
+  enable_public_projects_page: true
+
+  # Ask Arvados API server to compress its response payloads.
+  api_response_compression: true
+
+  # ShellInABox service endpoint URL for a given VM.  If false, do not
+  # offer web shell logins.
+  #
+  # E.g., using a path-based proxy server to forward connections to shell hosts:
+  # https://webshell.uuid_prefix.arvadosapi.com/%{hostname}
+  #
+  # E.g., using a name-based proxy server to forward connections to shell hosts:
+  # https://%{hostname}.webshell.uuid_prefix.arvadosapi.com/
+  shell_in_a_box_url: false
diff --git a/apps/workbench/config/database.yml b/apps/workbench/config/database.yml

index 34a3224cfc2526d936cda60a6c3882dbc760d2a6..39b299cbcef479a029fd249d315017e446980b42 100644 (file)
--- a/apps/workbench/config/database.yml
+++ b/apps/workbench/config/database.yml
@@ -1,39 +1,11 @@
-# SQLite version 3.x
-#   gem install sqlite3
-#
-#   Ensure the SQLite 3 gem is defined in your Gemfile
-#   gem 'sqlite3'
+# Note: The database configuration is not actually used.
  development:
-  adapter: sqlite3
-  database: db/development.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Warning: The database defined as "test" will be erased and
-# re-generated from your development database when you run "rake".
-# Do not set this db to the same as development or production.
+  adapter: nulldb
  test:
-  adapter: sqlite3
-  database: db/test.sqlite3
-  pool: 5
-  timeout: 5000
-
+  adapter: nulldb
  production:
-  adapter: sqlite3
-  database: db/production.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Note: The "diagnostics" database configuration is not actually used.
+  adapter: nulldb
  diagnostics:
-  adapter: sqlite3
-  database: db/diagnostics.sqlite3
-  pool: 5
-  timeout: 5000
-
-# Note: The "performance" database configuration is not actually used.
+  adapter: nulldb
  performance:
-  adapter: sqlite3
-  database: db/diagnostics.sqlite3
-  pool: 5
-  timeout: 5000
+  adapter: nulldb
diff --git a/apps/workbench/config/load_config.rb b/apps/workbench/config/load_config.rb

index 51fc81ab753d844ae020cda455220d1619a21cbd..f14c3ca8456b3b252574027f3b4ba53ad501ff85 100644 (file)
--- a/apps/workbench/config/load_config.rb
+++ b/apps/workbench/config/load_config.rb
@@ -23,11 +23,17 @@ ArvadosWorkbench::Application.configure do
      ks.each do |kk|
        cfg = cfg.send(kk)
      end
-    if cfg.respond_to?(k.to_sym) and !cfg.send(k).nil?
-      # Config must have been set already in environments/*.rb.
+    if v.nil? and cfg.respond_to?(k) and !cfg.send(k).nil?
+      # Config is nil in *.yml, but has been set already in
+      # environments/*.rb (or has a Rails default). Don't overwrite
+      # the default/upstream config with nil.
        #
        # After config files have been migrated, this mechanism should
-      # be deprecated, then removed.
+      # be removed.
+      Rails.logger.warn <<EOS
+DEPRECATED: Inheriting config.#{ks.join '.'} from Rails config.
+            Please move this config into config/application.yml.
+EOS
      elsif v.nil?
        # Config variables are not allowed to be nil. Make a "naughty"
        # list, and present it below.
diff --git a/apps/workbench/config/routes.rb b/apps/workbench/config/routes.rb

index 7ed02e7dc9ba11aa8beb07cae5f9c934ce77fd9f..487fb3fdd329a06e13bfa35216de0517e9822c6c 100644 (file)
--- a/apps/workbench/config/routes.rb
+++ b/apps/workbench/config/routes.rb
@@ -17,6 +17,7 @@ ArvadosWorkbench::Application.routes.draw do
    resources :traits
    resources :api_client_authorizations
    resources :virtual_machines
+  get '/virtual_machines/:id/webshell/:login' => 'virtual_machines#webshell', :as => :webshell_virtual_machine
    resources :authorized_keys
    resources :job_tasks
    resources :jobs do
@@ -26,6 +27,11 @@ ArvadosWorkbench::Application.routes.draw do
    resources :repositories do
      post 'share_with', on: :member
    end
+  # {format: false} prevents rails from treating "foo.png" as foo?format=png
+  get '/repositories/:id/tree/:commit' => 'repositories#show_tree'
+  get '/repositories/:id/tree/:commit/*path' => 'repositories#show_tree', as: :show_repository_tree, format: false
+  get '/repositories/:id/blob/:commit/*path' => 'repositories#show_blob', as: :show_repository_blob, format: false
+  get '/repositories/:id/commit/:commit' => 'repositories#show_commit', as: :show_repository_commit
    match '/logout' => 'sessions#destroy', via: [:get, :post]
    get '/logged_out' => 'sessions#index'
    resources :users do
@@ -40,8 +46,11 @@ ArvadosWorkbench::Application.routes.draw do
      get 'setup_popup', :on => :member
      get 'profile', :on => :member
      post 'request_shell_access', :on => :member
+    get 'virtual_machines', :on => :member
+    get 'repositories', :on => :member
+    get 'ssh_keys', :on => :member
    end
-  get '/manage_account' => 'users#manage_account'
+  get '/current_token' => 'users#current_token'
    get "/add_ssh_key_popup" => 'users#add_ssh_key_popup', :as => :add_ssh_key_popup
    get "/add_ssh_key" => 'users#add_ssh_key', :as => :add_ssh_key
    resources :logs
@@ -77,7 +86,9 @@ ArvadosWorkbench::Application.routes.draw do
      get 'choose', on: :collection
      post 'share_with', on: :member
      get 'tab_counts', on: :member
+    get 'public', on: :collection
    end
+
    resources :search do
      get 'choose', :on => :collection
    end
diff --git a/apps/workbench/public/webshell/README b/apps/workbench/public/webshell/README

new file mode 100644 (file)

index 0000000..b8920c5
--- /dev/null
+++ b/apps/workbench/public/webshell/README
@@ -0,0 +1,3 @@
+See also
+* VirtualMachinesController#webshell
+* https://code.google.com/p/shellinabox/source/browse/#git%2Fshellinabox
diff --git a/apps/workbench/public/webshell/enabled.gif b/apps/workbench/public/webshell/enabled.gif

new file mode 100644 (file)

index 0000000..07936e2

Binary files /dev/null and b/apps/workbench/public/webshell/enabled.gif differ
diff --git a/apps/workbench/public/webshell/keyboard.html b/apps/workbench/public/webshell/keyboard.html

new file mode 100644 (file)

index 0000000..6a95f3b
--- /dev/null
+++ b/apps/workbench/public/webshell/keyboard.html
@@ -0,0 +1,62 @@
+<?xml version="1.0" encoding="utf-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" xmlns:v="urn:schemas-microsoft-com:vml" xml:lang="en" lang="en">
+<head>
+</head>
+<body><pre class="box"><div
+  ><i id="27">Esc</i><i id="112">F1</i><i id="113">F2</i><i id="114">F3</i
+  ><i id="115">F4</i><i id="116">F5</i><i id="117">F6</i><i id="118">F7</i
+  ><i id="119">F8</i><i id="120">F9</i><i id="121">F10</i><i id="122">F11</i
+  ><i id="123">F12</i><br
+  /><b><span class="unshifted">`</span><span class="shifted">~</span></b
+    ><b><span class="unshifted">1</span><span class="shifted">!</span></b
+    ><b><span class="unshifted">2</span><span class="shifted">@</span></b
+    ><b><span class="unshifted">3</span><span class="shifted">#</span></b
+    ><b><span class="unshifted">4</span><span class="shifted">&#36;</span></b
+    ><b><span class="unshifted">5</span><span class="shifted">&#37;</span></b
+    ><b><span class="unshifted">6</span><span class="shifted">^</span></b
+    ><b><span class="unshifted">7</span><span class="shifted">&amp;</span></b
+    ><b><span class="unshifted">8</span><span class="shifted">*</span></b
+    ><b><span class="unshifted">9</span><span class="shifted">(</span></b
+    ><b><span class="unshifted">0</span><span class="shifted">)</span></b
+    ><b><span class="unshifted">-</span><span class="shifted">_</span></b
+    ><b><span class="unshifted">=</span><span class="shifted">+</span></b
+    ><i id="8">&nbsp;&larr;&nbsp;</i
+    ><br
+  /><i id="9">Tab</i
+    ><b>Q</b><b>W</b><b>E</b><b>R</b><b>T</b><b>Y</b><b>U</b><b>I</b><b>O</b
+    ><b>P</b
+    ><b><span class="unshifted">[</span><span class="shifted">{</span></b
+    ><b><span class="unshifted">]</span><span class="shifted">}</span></b
+    ><b><span class="unshifted">&#92;</span><span class="shifted">|</span></b
+    ><br
+  /><u>Tab&nbsp;&nbsp;</u
+    ><b>A</b><b>S</b><b>D</b><b>F</b><b>G</b><b>H</b><b>J</b><b>K</b><b>L</b
+    ><b><span class="unshifted">;</span><span class="shifted">:</span></b
+    ><b><span class="unshifted">&#39;</span><span class="shifted">"</span></b
+    ><i id="13">Enter</i
+    ><br
+  /><u>&nbsp;&nbsp;</u
+    ><i id="16">Shift</i
+    ><b>Z</b><b>X</b><b>C</b><b>V</b><b>B</b><b>N</b><b>M</b
+    ><b><span class="unshifted">,</span><span class="shifted">&lt;</span></b
+    ><b><span class="unshifted">.</span><span class="shifted">&gt;</span></b
+    ><b><span class="unshifted">/</span><span class="shifted">?</span></b
+    ><i id="16">Shift</i
+    ><br
+  /><u>XXX</u
+    ><i id="17">Ctrl</i
+    ><i id="18">Alt</i
+    ><i style="width: 25ex">&nbsp</i
+  ></div
+  >&nbsp;&nbsp;&nbsp;<div
+    ><i id="45">Ins</i><i id="46">Del</i><i id="36">Home</i><i id="35">End</i
+    ><br
+    /><u>&nbsp;</u><br
+    /><u>&nbsp;</u><br
+    /><u>Ins</u><s>&nbsp;</s><b id="38">&uarr;</b><s>&nbsp;</s><u>&nbsp;</u
+      ><b id="33">&uArr;</b><br
+    /><u>Ins</u><b id="37">&larr;</b><b id="40">&darr;</b
+      ><b id="39">&rarr;</b><u>&nbsp;</u><b id="34">&dArr;</b
+  ></div
+></pre></body></html>
diff --git a/apps/workbench/public/webshell/keyboard.png b/apps/workbench/public/webshell/keyboard.png

new file mode 100644 (file)

index 0000000..feef519

Binary files /dev/null and b/apps/workbench/public/webshell/keyboard.png differ
diff --git a/apps/workbench/public/webshell/shell_in_a_box.js b/apps/workbench/public/webshell/shell_in_a_box.js

new file mode 100644 (file)

index 0000000..0c7e800
--- /dev/null
+++ b/apps/workbench/public/webshell/shell_in_a_box.js
@@ -0,0 +1,4835 @@
+// This file contains code from shell_in_a_box.js and vt100.js
+
+
+// ShellInABox.js -- Use XMLHttpRequest to provide an AJAX terminal emulator.
+// Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// In addition to these license terms, the author grants the following
+// additional rights:
+//
+// If you modify this program, or any covered work, by linking or
+// combining it with the OpenSSL project's OpenSSL library (or a
+// modified version of that library), containing parts covered by the
+// terms of the OpenSSL or SSLeay licenses, the author
+// grants you additional permission to convey the resulting work.
+// Corresponding Source for a non-source form of such a combination
+// shall include the source code for the parts of OpenSSL used as well
+// as that of the covered work.
+//
+// You may at your option choose to remove this additional permission from
+// the work, or from any part of it.
+//
+// It is possible to build this program in a way that it loads OpenSSL
+// libraries at run-time. If doing so, the following notices are required
+// by the OpenSSL and SSLeay licenses:
+//
+// This product includes software developed by the OpenSSL Project
+// for use in the OpenSSL Toolkit. (http://www.openssl.org/)
+//
+// This product includes cryptographic software written by Eric Young
+// (eay@cryptsoft.com)
+//
+//
+// The most up-to-date version of this program is always available from
+// http://shellinabox.com
+//
+//
+// Notes:
+//
+// The author believes that for the purposes of this license, you meet the
+// requirements for publishing the source code, if your web server publishes
+// the source in unmodified form (i.e. with licensing information, comments,
+// formatting, and identifier names intact). If there are technical reasons
+// that require you to make changes to the source code when serving the
+// JavaScript (e.g to remove pre-processor directives from the source), these
+// changes should be done in a reversible fashion.
+//
+// The author does not consider websites that reference this script in
+// unmodified form, and web servers that serve this script in unmodified form
+// to be derived works. As such, they are believed to be outside of the
+// scope of this license and not subject to the rights or restrictions of the
+// GNU General Public License.
+//
+// If in doubt, consult a legal professional familiar with the laws that
+// apply in your country.
+
+// #define XHR_UNITIALIZED 0
+// #define XHR_OPEN        1
+// #define XHR_SENT        2
+// #define XHR_RECEIVING   3
+// #define XHR_LOADED      4
+
+// IE does not define XMLHttpRequest by default, so we provide a suitable
+// wrapper.
+if (typeof XMLHttpRequest == 'undefined') {
+  XMLHttpRequest = function() {
+    try { return new ActiveXObject('Msxml2.XMLHTTP.6.0');} catch (e) { }
+    try { return new ActiveXObject('Msxml2.XMLHTTP.3.0');} catch (e) { }
+    try { return new ActiveXObject('Msxml2.XMLHTTP');    } catch (e) { }
+    try { return new ActiveXObject('Microsoft.XMLHTTP'); } catch (e) { }
+    throw new Error('');
+  };
+}
+
+function extend(subClass, baseClass) {
+  function inheritance() { }
+  inheritance.prototype          = baseClass.prototype;
+  subClass.prototype             = new inheritance();
+  subClass.prototype.constructor = subClass;
+  subClass.prototype.superClass  = baseClass.prototype;
+};
+
+function ShellInABox(url, container) {
+  if (url == undefined) {
+    this.rooturl    = document.location.href;
+    this.url        = document.location.href.replace(/[?#].*/, '');
+  } else {
+    this.rooturl    = url;
+    this.url        = url;
+  }
+  if (document.location.hash != '') {
+    var hash        = decodeURIComponent(document.location.hash).
+                      replace(/^#/, '');
+    this.nextUrl    = hash.replace(/,.*/, '');
+    this.session    = hash.replace(/[^,]*,/, '');
+  } else {
+    this.nextUrl    = this.url;
+    this.session    = null;
+  }
+  this.pendingKeys  = '';
+  this.keysInFlight = false;
+  this.connected    = false;
+  this.superClass.constructor.call(this, container);
+
+  // We have to initiate the first XMLHttpRequest from a timer. Otherwise,
+  // Chrome never realizes that the page has loaded.
+  setTimeout(function(shellInABox) {
+               return function() {
+                 shellInABox.sendRequest();
+               };
+             }(this), 1);
+};
+extend(ShellInABox, VT100);
+
+ShellInABox.prototype.sessionClosed = function() {
+  try {
+    this.connected    = false;
+    if (this.session) {
+      this.session    = undefined;
+      if (this.cursorX > 0) {
+        this.vt100('\r\n');
+      }
+      this.vt100('Session closed.');
+    }
+    // Revealing the "reconnect" button is commented out until we hook
+    // up the username+token auto-login mechanism to the new session:
+    //this.showReconnect(true);
+  } catch (e) {
+  }
+};
+
+ShellInABox.prototype.reconnect = function() {
+  this.showReconnect(false);
+  if (!this.session) {
+    if (document.location.hash != '') {
+      // A shellinaboxd daemon launched from a CGI only allows a single
+      // session. In order to reconnect, we must reload the frame definition
+      // and obtain a new port number. As this is a different origin, we
+      // need to get enclosing page to help us.
+      parent.location        = this.nextUrl;
+    } else {
+      if (this.url != this.nextUrl) {
+        document.location.replace(this.nextUrl);
+      } else {
+        this.pendingKeys     = '';
+        this.keysInFlight    = false;
+        this.reset(true);
+        this.sendRequest();
+      }
+    }
+  }
+  return false;
+};
+
+ShellInABox.prototype.sendRequest = function(request) {
+  if (request == undefined) {
+    request                  = new XMLHttpRequest();
+  }
+  request.open('POST', this.url + '?', true);
+  request.setRequestHeader('Cache-Control', 'no-cache');
+  request.setRequestHeader('Content-Type',
+                           'application/x-www-form-urlencoded; charset=utf-8');
+  var content                = 'width=' + this.terminalWidth +
+                               '&height=' + this.terminalHeight +
+                               (this.session ? '&session=' +
+                                encodeURIComponent(this.session) : '&rooturl='+
+                                encodeURIComponent(this.rooturl));
+
+  request.onreadystatechange = function(shellInABox) {
+    return function() {
+             try {
+               return shellInABox.onReadyStateChange(request);
+             } catch (e) {
+               shellInABox.sessionClosed();
+             }
+           }
+    }(this);
+  ShellInABox.lastRequestSent = Date.now();
+  request.send(content);
+};
+
+ShellInABox.prototype.onReadyStateChange = function(request) {
+  if (request.readyState == 4 /* XHR_LOADED */) {
+    if (request.status == 200) {
+      this.connected = true;
+      var response   = eval('(' + request.responseText + ')');
+      if (response.data) {
+        this.vt100(response.data);
+      }
+
+      if (!response.session ||
+          this.session && this.session != response.session) {
+        this.sessionClosed();
+      } else {
+        this.session = response.session;
+        this.sendRequest(request);
+      }
+    } else if (request.status == 0) {
+        if (ShellInABox.lastRequestSent + 2000 < Date.now()) {
+            // Timeout, try again
+            this.sendRequest(request);
+        } else {
+            this.vt100('\r\n\r\nRequest failed.');
+            this.sessionClosed();
+        }
+    } else {
+      this.sessionClosed();
+    }
+  }
+};
+
+ShellInABox.prototype.sendKeys = function(keys) {
+  if (!this.connected) {
+    return;
+  }
+  if (this.keysInFlight || this.session == undefined) {
+    this.pendingKeys          += keys;
+  } else {
+    this.keysInFlight          = true;
+    keys                       = this.pendingKeys + keys;
+    this.pendingKeys           = '';
+    var request                = new XMLHttpRequest();
+    request.open('POST', this.url + '?', true);
+    request.setRequestHeader('Cache-Control', 'no-cache');
+    request.setRequestHeader('Content-Type',
+                           'application/x-www-form-urlencoded; charset=utf-8');
+    var content                = 'width=' + this.terminalWidth +
+                                 '&height=' + this.terminalHeight +
+                                 '&session=' +encodeURIComponent(this.session)+
+                                 '&keys=' + encodeURIComponent(keys);
+    request.onreadystatechange = function(shellInABox) {
+      return function() {
+               try {
+                 return shellInABox.keyPressReadyStateChange(request);
+               } catch (e) {
+               }
+             }
+      }(this);
+    request.send(content);
+  }
+};
+
+ShellInABox.prototype.keyPressReadyStateChange = function(request) {
+  if (request.readyState == 4 /* XHR_LOADED */) {
+    this.keysInFlight = false;
+    if (this.pendingKeys) {
+      this.sendKeys('');
+    }
+  }
+};
+
+ShellInABox.prototype.keysPressed = function(ch) {
+  var hex = '0123456789ABCDEF';
+  var s   = '';
+  for (var i = 0; i < ch.length; i++) {
+    var c = ch.charCodeAt(i);
+    if (c < 128) {
+      s += hex.charAt(c >> 4) + hex.charAt(c & 0xF);
+    } else if (c < 0x800) {
+      s += hex.charAt(0xC +  (c >> 10)       ) +
+           hex.charAt(       (c >>  6) & 0xF ) +
+           hex.charAt(0x8 + ((c >>  4) & 0x3)) +
+           hex.charAt(        c        & 0xF );
+    } else if (c < 0x10000) {
+      s += 'E'                                 +
+           hex.charAt(       (c >> 12)       ) +
+           hex.charAt(0x8 + ((c >> 10) & 0x3)) +
+           hex.charAt(       (c >>  6) & 0xF ) +
+           hex.charAt(0x8 + ((c >>  4) & 0x3)) +
+           hex.charAt(        c        & 0xF );
+    } else if (c < 0x110000) {
+      s += 'F'                                 +
+           hex.charAt(       (c >> 18)       ) +
+           hex.charAt(0x8 + ((c >> 16) & 0x3)) +
+           hex.charAt(       (c >> 12) & 0xF ) +
+           hex.charAt(0x8 + ((c >> 10) & 0x3)) +
+           hex.charAt(       (c >>  6) & 0xF ) +
+           hex.charAt(0x8 + ((c >>  4) & 0x3)) +
+           hex.charAt(        c        & 0xF );
+    }
+  }
+  this.sendKeys(s);
+};
+
+ShellInABox.prototype.resized = function(w, h) {
+  // Do not send a resize request until we are fully initialized.
+  if (this.session) {
+    // sendKeys() always transmits the current terminal size. So, flush all
+    // pending keys.
+    this.sendKeys('');
+  }
+};
+
+ShellInABox.prototype.toggleSSL = function() {
+  if (document.location.hash != '') {
+    if (this.nextUrl.match(/\?plain$/)) {
+      this.nextUrl    = this.nextUrl.replace(/\?plain$/, '');
+    } else {
+      this.nextUrl    = this.nextUrl.replace(/[?#].*/, '') + '?plain';
+    }
+    if (!this.session) {
+      parent.location = this.nextUrl;
+    }
+  } else {
+    this.nextUrl      = this.nextUrl.match(/^https:/)
+           ? this.nextUrl.replace(/^https:/, 'http:').replace(/\/*$/, '/plain')
+           : this.nextUrl.replace(/^http/, 'https').replace(/\/*plain$/, '');
+  }
+  if (this.nextUrl.match(/^[:]*:\/\/[^/]*$/)) {
+    this.nextUrl     += '/';
+  }
+  if (this.session && this.nextUrl != this.url) {
+    alert('This change will take effect the next time you login.');
+  }
+};
+
+ShellInABox.prototype.extendContextMenu = function(entries, actions) {
+  // Modify the entries and actions in place, adding any locally defined
+  // menu entries.
+  var oldActions            = [ ];
+  for (var i = 0; i < actions.length; i++) {
+    oldActions[i]           = actions[i];
+  }
+  for (var node = entries.firstChild, i = 0, j = 0; node;
+       node = node.nextSibling) {
+    if (node.tagName == 'LI') {
+      actions[i++]          = oldActions[j++];
+      if (node.id == "endconfig") {
+        node.id             = '';
+        if (typeof serverSupportsSSL != 'undefined' && serverSupportsSSL &&
+            !(typeof disableSSLMenu != 'undefined' && disableSSLMenu)) {
+          // If the server supports both SSL and plain text connections,
+          // provide a menu entry to switch between the two.
+          var newNode       = document.createElement('li');
+          var isSecure;
+          if (document.location.hash != '') {
+            isSecure        = !this.nextUrl.match(/\?plain$/);
+          } else {
+            isSecure        =  this.nextUrl.match(/^https:/);
+          }
+          newNode.innerHTML = (isSecure ? '&#10004; ' : '') + 'Secure';
+          if (node.nextSibling) {
+            entries.insertBefore(newNode, node.nextSibling);
+          } else {
+            entries.appendChild(newNode);
+          }
+          actions[i++]      = this.toggleSSL;
+          node              = newNode;
+        }
+        node.id             = 'endconfig';
+      }
+    }
+  }
+  
+};
+
+ShellInABox.prototype.about = function() {
+  alert("Shell In A Box version " + "2.10 (revision 239)" +
+        "\nCopyright 2008-2010 by Markus Gutschke\n" +
+        "For more information check http://shellinabox.com" +
+        (typeof serverSupportsSSL != 'undefined' && serverSupportsSSL ?
+         "\n\n" +
+         "This product includes software developed by the OpenSSL Project\n" +
+         "for use in the OpenSSL Toolkit. (http://www.openssl.org/)\n" +
+         "\n" +
+         "This product includes cryptographic software written by " +
+         "Eric Young\n(eay@cryptsoft.com)" :
+         ""));
+};
+
+
+// VT100.js -- JavaScript based terminal emulator
+// Copyright (C) 2008-2010 Markus Gutschke <markus@shellinabox.com>
+//
+// This program is free software; you can redistribute it and/or modify
+// it under the terms of the GNU General Public License version 2 as
+// published by the Free Software Foundation.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License along
+// with this program; if not, write to the Free Software Foundation, Inc.,
+// 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+//
+// In addition to these license terms, the author grants the following
+// additional rights:
+//
+// If you modify this program, or any covered work, by linking or
+// combining it with the OpenSSL project's OpenSSL library (or a
+// modified version of that library), containing parts covered by the
+// terms of the OpenSSL or SSLeay licenses, the author
+// grants you additional permission to convey the resulting work.
+// Corresponding Source for a non-source form of such a combination
+// shall include the source code for the parts of OpenSSL used as well
+// as that of the covered work.
+//
+// You may at your option choose to remove this additional permission from
+// the work, or from any part of it.
+//
+// It is possible to build this program in a way that it loads OpenSSL
+// libraries at run-time. If doing so, the following notices are required
+// by the OpenSSL and SSLeay licenses:
+//
+// This product includes software developed by the OpenSSL Project
+// for use in the OpenSSL Toolkit. (http://www.openssl.org/)
+//
+// This product includes cryptographic software written by Eric Young
+// (eay@cryptsoft.com)
+//
+//
+// The most up-to-date version of this program is always available from
+// http://shellinabox.com
+//
+//
+// Notes:
+//
+// The author believes that for the purposes of this license, you meet the
+// requirements for publishing the source code, if your web server publishes
+// the source in unmodified form (i.e. with licensing information, comments,
+// formatting, and identifier names intact). If there are technical reasons
+// that require you to make changes to the source code when serving the
+// JavaScript (e.g to remove pre-processor directives from the source), these
+// changes should be done in a reversible fashion.
+//
+// The author does not consider websites that reference this script in
+// unmodified form, and web servers that serve this script in unmodified form
+// to be derived works. As such, they are believed to be outside of the
+// scope of this license and not subject to the rights or restrictions of the
+// GNU General Public License.
+//
+// If in doubt, consult a legal professional familiar with the laws that
+// apply in your country.
+
+// #define ESnormal        0
+// #define ESesc           1
+// #define ESsquare        2
+// #define ESgetpars       3
+// #define ESgotpars       4
+// #define ESdeviceattr    5
+// #define ESfunckey       6
+// #define EShash          7
+// #define ESsetG0         8
+// #define ESsetG1         9
+// #define ESsetG2        10
+// #define ESsetG3        11
+// #define ESbang         12
+// #define ESpercent      13
+// #define ESignore       14
+// #define ESnonstd       15
+// #define ESpalette      16
+// #define EStitle        17
+// #define ESss2          18
+// #define ESss3          19
+
+// #define ATTR_DEFAULT   0x00F0
+// #define ATTR_REVERSE   0x0100
+// #define ATTR_UNDERLINE 0x0200
+// #define ATTR_DIM       0x0400
+// #define ATTR_BRIGHT    0x0800
+// #define ATTR_BLINK     0x1000
+
+// #define MOUSE_DOWN     0
+// #define MOUSE_UP       1
+// #define MOUSE_CLICK    2
+
+function VT100(container) {
+  if (typeof linkifyURLs == 'undefined' || linkifyURLs <= 0) {
+    this.urlRE            = null;
+  } else {
+    this.urlRE            = new RegExp(
+    // Known URL protocol are "http", "https", and "ftp".
+    '(?:http|https|ftp)://' +
+
+    // Optionally allow username and passwords.
+    '(?:[^:@/ \u00A0]*(?::[^@/ \u00A0]*)?@)?' +
+
+    // Hostname.
+    '(?:[1-9][0-9]{0,2}(?:[.][1-9][0-9]{0,2}){3}|' +
+    '[0-9a-fA-F]{0,4}(?::{1,2}[0-9a-fA-F]{1,4})+|' +
+    '(?!-)[^[!"#$%&\'()*+,/:;<=>?@\\^_`{|}~\u0000- \u007F-\u00A0]+)' +
+
+    // Port
+    '(?::[1-9][0-9]*)?' +
+
+    // Path.
+    '(?:/(?:(?![/ \u00A0]|[,.)}"\u0027!]+[ \u00A0]|[,.)}"\u0027!]+$).)*)*|' +
+
+    (linkifyURLs <= 1 ? '' :
+    // Also support URLs without a protocol (assume "http").
+    // Optional username and password.
+    '(?:[^:@/ \u00A0]*(?::[^@/ \u00A0]*)?@)?' +
+
+    // Hostnames must end with a well-known top-level domain or must be
+    // numeric.
+    '(?:[1-9][0-9]{0,2}(?:[.][1-9][0-9]{0,2}){3}|' +
+    'localhost|' +
+    '(?:(?!-)' +
+        '[^.[!"#$%&\'()*+,/:;<=>?@\\^_`{|}~\u0000- \u007F-\u00A0]+[.]){2,}' +
+    '(?:(?:com|net|org|edu|gov|aero|asia|biz|cat|coop|info|int|jobs|mil|mobi|'+
+    'museum|name|pro|tel|travel|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|' +
+    'au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|' +
+    'ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|' +
+    'dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|' +
+    'gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|' +
+    'ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|' +
+    'lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|' +
+    'mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|' +
+    'pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|' +
+    'sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|' +
+    'tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|' +
+    'yu|za|zm|zw|arpa)(?![a-zA-Z0-9])|[Xx][Nn]--[-a-zA-Z0-9]+))' +
+
+    // Port
+    '(?::[1-9][0-9]{0,4})?' +
+
+    // Path.
+    '(?:/(?:(?![/ \u00A0]|[,.)}"\u0027!]+[ \u00A0]|[,.)}"\u0027!]+$).)*)*|') +
+
+    // In addition, support e-mail address. Optionally, recognize "mailto:"
+    '(?:mailto:)' + (linkifyURLs <= 1 ? '' : '?') +
+
+    // Username:
+    '[-_.+a-zA-Z0-9]+@' +
+
+    // Hostname.
+    '(?!-)[-a-zA-Z0-9]+(?:[.](?!-)[-a-zA-Z0-9]+)?[.]' +
+    '(?:(?:com|net|org|edu|gov|aero|asia|biz|cat|coop|info|int|jobs|mil|mobi|'+
+    'museum|name|pro|tel|travel|ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|' +
+    'au|aw|ax|az|ba|bb|bd|be|bf|bg|bh|bi|bj|bm|bn|bo|br|bs|bt|bv|bw|by|bz|' +
+    'ca|cc|cd|cf|cg|ch|ci|ck|cl|cm|cn|co|cr|cu|cv|cx|cy|cz|de|dj|dk|dm|do|' +
+    'dz|ec|ee|eg|er|es|et|eu|fi|fj|fk|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|' +
+    'gm|gn|gp|gq|gr|gs|gt|gu|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|' +
+    'ir|is|it|je|jm|jo|jp|ke|kg|kh|ki|km|kn|kp|kr|kw|ky|kz|la|lb|lc|li|lk|' +
+    'lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mm|mn|mo|mp|mq|mr|ms|mt|mu|' +
+    'mv|mw|mx|my|mz|na|nc|ne|nf|ng|ni|nl|no|np|nr|nu|nz|om|pa|pe|pf|pg|ph|' +
+    'pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|' +
+    'sj|sk|sl|sm|sn|so|sr|st|su|sv|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|' +
+    'tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|ye|yt|' +
+    'yu|za|zm|zw|arpa)(?![a-zA-Z0-9])|[Xx][Nn]--[-a-zA-Z0-9]+)' +
+
+    // Optional arguments
+    '(?:[?](?:(?![ \u00A0]|[,.)}"\u0027!]+[ \u00A0]|[,.)}"\u0027!]+$).)*)?');
+  }
+  this.getUserSettings();
+  this.initializeElements(container);
+  this.maxScrollbackLines = 500;
+  this.npar               = 0;
+  this.par                = [ ];
+  this.isQuestionMark     = false;
+  this.savedX             = [ ];
+  this.savedY             = [ ];
+  this.savedAttr          = [ ];
+  this.savedUseGMap       = 0;
+  this.savedGMap          = [ this.Latin1Map, this.VT100GraphicsMap,
+                              this.CodePage437Map, this.DirectToFontMap ];
+  this.savedValid         = [ ];
+  this.respondString      = '';
+  this.titleString        = '';
+  this.internalClipboard  = undefined;
+  this.reset(true);
+}
+
+VT100.prototype.reset = function(clearHistory) {
+  this.isEsc                                         = 0 /* ESnormal */;
+  this.needWrap                                      = false;
+  this.autoWrapMode                                  = true;
+  this.dispCtrl                                      = false;
+  this.toggleMeta                                    = false;
+  this.insertMode                                    = false;
+  this.applKeyMode                                   = false;
+  this.cursorKeyMode                                 = false;
+  this.crLfMode                                      = false;
+  this.offsetMode                                    = false;
+  this.mouseReporting                                = false;
+  this.printing                                      = false;
+  if (typeof this.printWin != 'undefined' &&
+      this.printWin && !this.printWin.closed) {
+    this.printWin.close();
+  }
+  this.printWin                                      = null;
+  this.utfEnabled                                    = this.utfPreferred;
+  this.utfCount                                      = 0;
+  this.utfChar                                       = 0;
+  this.color                                         = 'ansi0 bgAnsi15';
+  this.style                                         = '';
+  this.attr                                          = 0x00F0 /* ATTR_DEFAULT */;
+  this.useGMap                                       = 0;
+  this.GMap                                          = [ this.Latin1Map,
+                                                         this.VT100GraphicsMap,
+                                                         this.CodePage437Map,
+                                                         this.DirectToFontMap];
+  this.translate                                     = this.GMap[this.useGMap];
+  this.top                                           = 0;
+  this.bottom                                        = this.terminalHeight;
+  this.lastCharacter                                 = ' ';
+  this.userTabStop                                   = [ ];
+
+  if (clearHistory) {
+    for (var i = 0; i < 2; i++) {
+      while (this.console[i].firstChild) {
+        this.console[i].removeChild(this.console[i].firstChild);
+      }
+    }
+  }
+
+  this.enableAlternateScreen(false);
+
+  var wasCompressed                                  = false;
+  var transform                                      = this.getTransformName();
+  if (transform) {
+    for (var i = 0; i < 2; ++i) {
+      wasCompressed                  |= this.console[i].style[transform] != '';
+      this.console[i].style[transform]               = '';
+    }
+    this.cursor.style[transform]                     = '';
+    this.space.style[transform]                      = '';
+    if (transform == 'filter') {
+      this.console[this.currentScreen].style.width   = '';
+    }
+  }
+  this.scale                                         = 1.0;
+  if (wasCompressed) {
+    this.resizer();
+  }
+
+  this.gotoXY(0, 0);
+  this.showCursor();
+  this.isInverted                                    = false;
+  this.refreshInvertedState();
+  this.clearRegion(0, 0, this.terminalWidth, this.terminalHeight,
+                   this.color, this.style);
+};
+
+VT100.prototype.addListener = function(elem, event, listener) {
+  try {
+    if (elem.addEventListener) {
+      elem.addEventListener(event, listener, false);
+    } else {
+      elem.attachEvent('on' + event, listener);
+    }
+  } catch (e) {
+  }
+};
+
+VT100.prototype.getUserSettings = function() {
+  // Compute hash signature to identify the entries in the userCSS menu.
+  // If the menu is unchanged from last time, default values can be
+  // looked up in a cookie associated with this page.
+  this.signature            = 3;
+  this.utfPreferred         = true;
+  this.visualBell           = typeof suppressAllAudio != 'undefined' &&
+                              suppressAllAudio;
+  this.autoprint            = true;
+  this.softKeyboard         = false;
+  this.blinkingCursor       = true;
+  if (this.visualBell) {
+    this.signature          = Math.floor(16807*this.signature + 1) %
+                                         ((1 << 31) - 1);
+  }
+  if (typeof userCSSList != 'undefined') {
+    for (var i = 0; i < userCSSList.length; ++i) {
+      var label             = userCSSList[i][0];
+      for (var j = 0; j < label.length; ++j) {
+        this.signature      = Math.floor(16807*this.signature+
+                                         label.charCodeAt(j)) %
+                                         ((1 << 31) - 1);
+      }
+      if (userCSSList[i][1]) {
+        this.signature      = Math.floor(16807*this.signature + 1) %
+                                         ((1 << 31) - 1);
+      }
+    }
+  }
+
+  var key                   = 'shellInABox=' + this.signature + ':';
+  var settings              = document.cookie.indexOf(key);
+  if (settings >= 0) {
+    settings                = document.cookie.substr(settings + key.length).
+                                                   replace(/([0-1]*).*/, "$1");
+    if (settings.length == 5 + (typeof userCSSList == 'undefined' ?
+                                0 : userCSSList.length)) {
+      this.utfPreferred     = settings.charAt(0) != '0';
+      this.visualBell       = settings.charAt(1) != '0';
+      this.autoprint        = settings.charAt(2) != '0';
+      this.softKeyboard     = settings.charAt(3) != '0';
+      this.blinkingCursor   = settings.charAt(4) != '0';
+      if (typeof userCSSList != 'undefined') {
+        for (var i = 0; i < userCSSList.length; ++i) {
+          userCSSList[i][2] = settings.charAt(i + 5) != '0';
+        }
+      }
+    }
+  }
+  this.utfEnabled           = this.utfPreferred;
+};
+
+VT100.prototype.storeUserSettings = function() {
+  var settings  = 'shellInABox=' + this.signature + ':' +
+                  (this.utfEnabled     ? '1' : '0') +
+                  (this.visualBell     ? '1' : '0') +
+                  (this.autoprint      ? '1' : '0') +
+                  (this.softKeyboard   ? '1' : '0') +
+                  (this.blinkingCursor ? '1' : '0');
+  if (typeof userCSSList != 'undefined') {
+    for (var i = 0; i < userCSSList.length; ++i) {
+      settings += userCSSList[i][2] ? '1' : '0';
+    }
+  }
+  var d         = new Date();
+  d.setDate(d.getDate() + 3653);
+  document.cookie = settings + ';expires=' + d.toGMTString();
+};
+
+VT100.prototype.initializeUserCSSStyles = function() {
+  this.usercssActions                    = [];
+  if (typeof userCSSList != 'undefined') {
+    var menu                             = '';
+    var group                            = '';
+    var wasSingleSel                     = 1;
+    var beginOfGroup                     = 0;
+    for (var i = 0; i <= userCSSList.length; ++i) {
+      if (i < userCSSList.length) {
+        var label                        = userCSSList[i][0];
+        var newGroup                     = userCSSList[i][1];
+        var enabled                      = userCSSList[i][2];
+      
+        // Add user style sheet to document
+        var style                        = document.createElement('link');
+        var id                           = document.createAttribute('id');
+        id.nodeValue                     = 'usercss-' + i;
+        style.setAttributeNode(id);
+        var rel                          = document.createAttribute('rel');
+        rel.nodeValue                    = 'stylesheet';
+        style.setAttributeNode(rel);
+        var href                         = document.createAttribute('href');
+        href.nodeValue                   = 'usercss-' + i + '.css';
+        style.setAttributeNode(href);
+        var type                         = document.createAttribute('type');
+        type.nodeValue                   = 'text/css';
+        style.setAttributeNode(type);
+        document.getElementsByTagName('head')[0].appendChild(style);
+        style.disabled                   = !enabled;
+      }
+    
+      // Add entry to menu
+      if (newGroup || i == userCSSList.length) {
+        if (beginOfGroup != 0 && (i - beginOfGroup > 1 || !wasSingleSel)) {
+          // The last group had multiple entries that are mutually exclusive;
+          // or the previous to last group did. In either case, we need to
+          // append a "<hr />" before we can add the last group to the menu.
+          menu                          += '<hr />';
+        }
+        wasSingleSel                     = i - beginOfGroup < 1;
+        menu                            += group;
+        group                            = '';
+
+        for (var j = beginOfGroup; j < i; ++j) {
+          this.usercssActions[this.usercssActions.length] =
+            function(vt100, current, begin, count) {
+
+              // Deselect all other entries in the group, then either select
+              // (for multiple entries in group) or toggle (for on/off entry)
+              // the current entry.
+              return function() {
+                var entry                = vt100.getChildById(vt100.menu,
+                                                              'beginusercss');
+                var i                    = -1;
+                var j                    = -1;
+                for (var c = count; c > 0; ++j) {
+                  if (entry.tagName == 'LI') {
+                    if (++i >= begin) {
+                      --c;
+                      var label          = vt100.usercss.childNodes[j];
+
+                      // Restore label to just the text content
+                      if (typeof label.textContent == 'undefined') {
+                        var s            = label.innerText;
+                        label.innerHTML  = '';
+                        label.appendChild(document.createTextNode(s));
+                      } else {
+                        label.textContent= label.textContent;
+                      }
+
+                      // User style sheets are numbered sequentially
+                      var sheet          = document.getElementById(
+                                                               'usercss-' + i);
+                      if (i == current) {
+                        if (count == 1) {
+                          sheet.disabled = !sheet.disabled;
+                        } else {
+                          sheet.disabled = false;
+                        }
+                        if (!sheet.disabled) {
+                          label.innerHTML= '<img src="/webshell/enabled.gif" />' +
+                                           label.innerHTML;
+                        }
+                      } else {
+                        sheet.disabled   = true;
+                      }
+                      userCSSList[i][2]  = !sheet.disabled;
+                    }
+                  }
+                  entry                  = entry.nextSibling;
+                }
+
+                // If the font size changed, adjust cursor and line dimensions
+                this.cursor.style.cssText= '';
+                this.cursorWidth         = this.cursor.clientWidth;
+                this.cursorHeight        = this.lineheight.clientHeight;
+                for (i = 0; i < this.console.length; ++i) {
+                  for (var line = this.console[i].firstChild; line;
+                       line = line.nextSibling) {
+                    line.style.height    = this.cursorHeight + 'px';
+                  }
+                }
+                vt100.resizer();
+              };
+            }(this, j, beginOfGroup, i - beginOfGroup);
+        }
+
+        if (i == userCSSList.length) {
+          break;
+        }
+
+        beginOfGroup                     = i;
+      }
+      // Collect all entries in a group, before attaching them to the menu.
+      // This is necessary as we don't know whether this is a group of
+      // mutually exclusive options (which should be separated by "<hr />" on
+      // both ends), or whether this is a on/off toggle, which can be grouped
+      // together with other on/off options.
+      group                             +=
+        '<li>' + (enabled ? '<img src="/webshell/enabled.gif" />' : '') +
+                 label +
+        '</li>';
+    }
+    this.usercss.innerHTML               = menu;
+  }
+};
+
+VT100.prototype.resetLastSelectedKey = function(e) {
+  var key                          = this.lastSelectedKey;
+  if (!key) {
+    return false;
+  }
+
+  var position                     = this.mousePosition(e);
+
+  // We don't get all the necessary events to reliably reselect a key
+  // if we moved away from it and then back onto it. We approximate the
+  // behavior by remembering the key until either we release the mouse
+  // button (we might never get this event if the mouse has since left
+  // the window), or until we move away too far.
+  var box                          = this.keyboard.firstChild;
+  if (position[0] <  box.offsetLeft + key.offsetWidth ||
+      position[1] <  box.offsetTop + key.offsetHeight ||
+      position[0] >= box.offsetLeft + box.offsetWidth - key.offsetWidth ||
+      position[1] >= box.offsetTop + box.offsetHeight - key.offsetHeight ||
+      position[0] <  box.offsetLeft + key.offsetLeft - key.offsetWidth ||
+      position[1] <  box.offsetTop + key.offsetTop - key.offsetHeight ||
+      position[0] >= box.offsetLeft + key.offsetLeft + 2*key.offsetWidth ||
+      position[1] >= box.offsetTop + key.offsetTop + 2*key.offsetHeight) {
+    if (this.lastSelectedKey.className) log.console('reset: deselecting');
+    this.lastSelectedKey.className = '';
+    this.lastSelectedKey           = undefined;
+  }
+  return false;
+};
+
+VT100.prototype.showShiftState = function(state) {
+  var style              = document.getElementById('shift_state');
+  if (state) {
+    this.setTextContentRaw(style,
+                           '#vt100 #keyboard .shifted {' +
+                             'display: inline }' +
+                           '#vt100 #keyboard .unshifted {' +
+                             'display: none }');
+  } else {
+    this.setTextContentRaw(style, '');
+  }
+  var elems              = this.keyboard.getElementsByTagName('I');
+  for (var i = 0; i < elems.length; ++i) {
+    if (elems[i].id == '16') {
+      elems[i].className = state ? 'selected' : '';
+    }
+  }
+};
+
+VT100.prototype.showCtrlState = function(state) {
+  var ctrl         = this.getChildById(this.keyboard, '17' /* Ctrl */);
+  if (ctrl) {
+    ctrl.className = state ? 'selected' : '';
+  }
+};
+
+VT100.prototype.showAltState = function(state) {
+  var alt         = this.getChildById(this.keyboard, '18' /* Alt */);
+  if (alt) {
+    alt.className = state ? 'selected' : '';
+  }
+};
+
+VT100.prototype.clickedKeyboard = function(e, elem, ch, key, shift, ctrl, alt){
+  var fake      = [ ];
+  fake.charCode = ch;
+  fake.keyCode  = key;
+  fake.ctrlKey  = ctrl;
+  fake.shiftKey = shift;
+  fake.altKey   = alt;
+  fake.metaKey  = alt;
+  return this.handleKey(fake);
+};
+
+VT100.prototype.addKeyBinding = function(elem, ch, key, CH, KEY) {
+  if (elem == undefined) {
+    return;
+  }
+  if (ch == '\u00A0') {
+    // &nbsp; should be treated as a regular space character.
+    ch                                  = ' ';
+  }
+  if (ch != undefined && CH == undefined) {
+    // For letter keys, we automatically compute the uppercase character code
+    // from the lowercase one.
+    CH                                  = ch.toUpperCase();
+  }
+  if (KEY == undefined && key != undefined) {
+    // Most keys have identically key codes for both lowercase and uppercase
+    // keypresses. Normally, only function keys would have distinct key codes,
+    // whereas regular keys have character codes.
+    KEY                                 = key;
+  } else if (KEY == undefined && CH != undefined) {
+    // For regular keys, copy the character code to the key code.
+    KEY                                 = CH.charCodeAt(0);
+  }
+  if (key == undefined && ch != undefined) {
+    // For regular keys, copy the character code to the key code.
+    key                                 = ch.charCodeAt(0);
+  }
+  // Convert characters to numeric character codes. If the character code
+  // is undefined (i.e. this is a function key), set it to zero.
+  ch                                    = ch ? ch.charCodeAt(0) : 0;
+  CH                                    = CH ? CH.charCodeAt(0) : 0;
+
+  // Mouse down events high light the key. We also set lastSelectedKey. This
+  // is needed to that mouseout/mouseover can keep track of the key that
+  // is currently being clicked.
+  this.addListener(elem, 'mousedown',
+    function(vt100, elem, key) { return function(e) {
+      if ((e.which || e.button) == 1) {
+        if (vt100.lastSelectedKey) {       
+          vt100.lastSelectedKey.className= '';
+        }
+        // Highlight the key while the mouse button is held down.
+        if (key == 16 /* Shift */) {
+          if (!elem.className != vt100.isShift) {
+            vt100.showShiftState(!vt100.isShift);
+          }
+        } else if (key == 17 /* Ctrl */) {
+          if (!elem.className != vt100.isCtrl) {
+            vt100.showCtrlState(!vt100.isCtrl);
+          }
+        } else if (key == 18 /* Alt */) {
+          if (!elem.className != vt100.isAlt) {
+            vt100.showAltState(!vt100.isAlt);
+          }
+        } else {
+          elem.className                  = 'selected';
+        }
+        vt100.lastSelectedKey             = elem;
+      }
+      return false; }; }(this, elem, key));
+  var clicked                           =
+    // Modifier keys update the state of the keyboard, but do not generate
+    // any key clicks that get forwarded to the application.
+    key >= 16 /* Shift */ && key <= 18 /* Alt */ ?
+    function(vt100, elem) { return function(e) {
+      if (elem == vt100.lastSelectedKey) {
+        if (key == 16 /* Shift */) {
+          // The user clicked the Shift key
+          vt100.isShift                 = !vt100.isShift;
+          vt100.showShiftState(vt100.isShift);
+        } else if (key == 17 /* Ctrl */) {
+          vt100.isCtrl                  = !vt100.isCtrl;
+          vt100.showCtrlState(vt100.isCtrl);
+        } else if (key == 18 /* Alt */) {
+          vt100.isAlt                   = !vt100.isAlt;
+          vt100.showAltState(vt100.isAlt);
+        }
+        vt100.lastSelectedKey           = undefined;
+      }
+      if (vt100.lastSelectedKey) {
+        vt100.lastSelectedKey.className = '';
+        vt100.lastSelectedKey           = undefined;
+      }
+      return false; }; }(this, elem) :
+    // Regular keys generate key clicks, when the mouse button is released or
+    // when a mouse click event is received.
+    function(vt100, elem, ch, key, CH, KEY) { return function(e) {
+      if (vt100.lastSelectedKey) {
+        if (elem == vt100.lastSelectedKey) {
+          // The user clicked a key.
+          if (vt100.isShift) {
+            vt100.clickedKeyboard(e, elem, CH, KEY,
+                                  true, vt100.isCtrl, vt100.isAlt);
+          } else {
+            vt100.clickedKeyboard(e, elem, ch, key,
+                                  false, vt100.isCtrl, vt100.isAlt);
+          }
+          vt100.isShift                 = false;
+          vt100.showShiftState(false);
+          vt100.isCtrl                  = false;
+          vt100.showCtrlState(false);
+          vt100.isAlt                   = false;
+          vt100.showAltState(false);
+        }
+        vt100.lastSelectedKey.className = '';
+        vt100.lastSelectedKey           = undefined;
+      }
+      elem.className                    = '';
+      return false; }; }(this, elem, ch, key, CH, KEY);
+  this.addListener(elem, 'mouseup', clicked);
+  this.addListener(elem, 'click', clicked);
+
+  // When moving the mouse away from a key, check if any keys need to be
+  // deselected.
+  this.addListener(elem, 'mouseout',
+    function(vt100, elem, key) { return function(e) {
+      if (key == 16 /* Shift */) {
+        if (!elem.className == vt100.isShift) {
+          vt100.showShiftState(vt100.isShift);
+        }
+      } else if (key == 17 /* Ctrl */) {
+        if (!elem.className == vt100.isCtrl) {
+          vt100.showCtrlState(vt100.isCtrl);
+        }
+      } else if (key == 18 /* Alt */) {
+        if (!elem.className == vt100.isAlt) {
+          vt100.showAltState(vt100.isAlt);
+        }
+      } else if (elem.className) {
+        elem.className                  = '';
+        vt100.lastSelectedKey           = elem;
+      } else if (vt100.lastSelectedKey) {
+        vt100.resetLastSelectedKey(e);
+      }
+      return false; }; }(this, elem, key));
+
+  // When moving the mouse over a key, select it if the user is still holding
+  // the mouse button down (i.e. elem == lastSelectedKey)
+  this.addListener(elem, 'mouseover',
+    function(vt100, elem, key) { return function(e) {
+      if (elem == vt100.lastSelectedKey) {
+        if (key == 16 /* Shift */) {
+          if (!elem.className != vt100.isShift) {
+            vt100.showShiftState(!vt100.isShift);
+          }
+        } else if (key == 17 /* Ctrl */) {
+          if (!elem.className != vt100.isCtrl) {
+            vt100.showCtrlState(!vt100.isCtrl);
+          }
+        } else if (key == 18 /* Alt */) {
+          if (!elem.className != vt100.isAlt) {
+            vt100.showAltState(!vt100.isAlt);
+          }
+        } else if (!elem.className) {
+          elem.className                = 'selected';
+        }
+      } else {
+        vt100.resetLastSelectedKey(e);
+      }
+      return false; }; }(this, elem, key));
+};
+
+VT100.prototype.initializeKeyBindings = function(elem) {
+  if (elem) {
+    if (elem.nodeName == "I" || elem.nodeName == "B") {
+      if (elem.id) {
+        // Function keys. The Javascript keycode is part of the "id"
+        var i     = parseInt(elem.id);
+        if (i) {
+          // If the id does not parse as a number, it is not a keycode.
+          this.addKeyBinding(elem, undefined, i);
+        }
+      } else {
+        var child = elem.firstChild;
+        if (child) {
+          if (child.nodeName == "#text") {
+            // If the key only has a text node as a child, then it is a letter.
+            // Automatically compute the lower and upper case version of the
+            // key.
+            var text = this.getTextContent(child) ||
+                       this.getTextContent(elem);
+            this.addKeyBinding(elem, text.toLowerCase());
+          } else if (child.nextSibling) {
+            // If the key has two children, they are the lower and upper case
+            // character code, respectively.
+            this.addKeyBinding(elem, this.getTextContent(child), undefined,
+                               this.getTextContent(child.nextSibling));
+          }
+        }
+      }
+    }
+  }
+  // Recursively parse all other child nodes.
+  for (elem = elem.firstChild; elem; elem = elem.nextSibling) {
+    this.initializeKeyBindings(elem);
+  }
+};
+
+VT100.prototype.initializeKeyboardButton = function() {
+  // Configure mouse event handlers for button that displays/hides keyboard
+  this.addListener(this.keyboardImage, 'click',
+    function(vt100) { return function(e) {
+      if (vt100.keyboard.style.display != '') {
+        if (vt100.reconnectBtn.style.visibility != '') {
+          vt100.initializeKeyboard();
+          vt100.showSoftKeyboard();
+        }
+      } else {
+        vt100.hideSoftKeyboard();
+        vt100.input.focus();
+      }
+      return false; }; }(this));
+
+  // Enable button that displays keyboard
+  if (this.softKeyboard) {
+    this.keyboardImage.style.visibility = 'visible';
+  }
+};
+
+VT100.prototype.initializeKeyboard = function() {
+  // Only need to initialize the keyboard the very first time. When doing so,
+  // copy the keyboard layout from the iframe.
+  if (this.keyboard.firstChild) {
+    return;
+  }
+  this.keyboard.innerHTML               =
+                                    this.layout.contentDocument.body.innerHTML;
+  var box                               = this.keyboard.firstChild;
+  this.hideSoftKeyboard();
+
+  // Configure mouse event handlers for on-screen keyboard
+  this.addListener(this.keyboard, 'click',
+    function(vt100) { return function(e) {
+      vt100.hideSoftKeyboard();
+      vt100.input.focus();
+      return false; }; }(this));
+  this.addListener(this.keyboard, 'selectstart', this.cancelEvent);
+  this.addListener(box, 'click', this.cancelEvent);
+  this.addListener(box, 'mouseup',
+    function(vt100) { return function(e) {
+      if (vt100.lastSelectedKey) {
+        vt100.lastSelectedKey.className = '';
+        vt100.lastSelectedKey           = undefined;
+      }
+      return false; }; }(this));
+  this.addListener(box, 'mouseout',
+    function(vt100) { return function(e) {
+      return vt100.resetLastSelectedKey(e); }; }(this));
+  this.addListener(box, 'mouseover',
+    function(vt100) { return function(e) {
+      return vt100.resetLastSelectedKey(e); }; }(this));
+
+  // Configure SHIFT key behavior
+  var style                             = document.createElement('style');
+  var id                                = document.createAttribute('id');
+  id.nodeValue                          = 'shift_state';
+  style.setAttributeNode(id);
+  var type                              = document.createAttribute('type');
+  type.nodeValue                        = 'text/css';
+  style.setAttributeNode(type);
+  document.getElementsByTagName('head')[0].appendChild(style);
+
+  // Set up key bindings
+  this.initializeKeyBindings(box);
+};
+
+VT100.prototype.initializeElements = function(container) {
+  // If the necessary objects have not already been defined in the HTML
+  // page, create them now.
+  if (container) {
+    this.container             = container;
+  } else if (!(this.container  = document.getElementById('vt100'))) {
+    this.container             = document.createElement('div');
+    this.container.id          = 'vt100';
+    document.body.appendChild(this.container);
+  }
+
+  if (!this.getChildById(this.container, 'reconnect')   ||
+      !this.getChildById(this.container, 'menu')        ||
+      !this.getChildById(this.container, 'keyboard')    ||
+      !this.getChildById(this.container, 'kbd_button')  ||
+      !this.getChildById(this.container, 'kbd_img')     ||
+      !this.getChildById(this.container, 'layout')      ||
+      !this.getChildById(this.container, 'scrollable')  ||
+      !this.getChildById(this.container, 'console')     ||
+      !this.getChildById(this.container, 'alt_console') ||
+      !this.getChildById(this.container, 'ieprobe')     ||
+      !this.getChildById(this.container, 'padding')     ||
+      !this.getChildById(this.container, 'cursor')      ||
+      !this.getChildById(this.container, 'lineheight')  ||
+      !this.getChildById(this.container, 'usercss')     ||
+      !this.getChildById(this.container, 'space')       ||
+      !this.getChildById(this.container, 'input')       ||
+      !this.getChildById(this.container, 'cliphelper')) {
+    // Only enable the "embed" object, if we have a suitable plugin. Otherwise,
+    // we might get a pointless warning that a suitable plugin is not yet
+    // installed. If in doubt, we'd rather just stay silent.
+    var embed                  = '';
+    try {
+      if (typeof navigator.mimeTypes["audio/x-wav"].enabledPlugin.name !=
+          'undefined') {
+        embed                  = typeof suppressAllAudio != 'undefined' &&
+                                 suppressAllAudio ? "" :
+        '<embed classid="clsid:02BF25D5-8C17-4B23-BC80-D3488ABDDC6B" ' +
+                       'id="beep_embed" ' +
+                       'src="beep.wav" ' +
+                       'autostart="false" ' +
+                       'volume="100" ' +
+                       'enablejavascript="true" ' +
+                       'type="audio/x-wav" ' +
+                       'height="16" ' +
+                       'width="200" ' +
+                       'style="position:absolute;left:-1000px;top:-1000px" />';
+      }
+    } catch (e) {
+    }
+
+    this.container.innerHTML   =
+                       '<div id="reconnect" style="visibility: hidden">' +
+                         '<input type="button" value="Connect" ' +
+                                'onsubmit="return false" />' +
+                       '</div>' +
+                       '<div id="cursize" style="visibility: hidden">' +
+                       '</div>' +
+                       '<div id="menu"></div>' +
+                       '<div id="keyboard" unselectable="on">' +
+                       '</div>' +
+                       '<div id="scrollable">' +
+                         '<table id="kbd_button">' +
+                           '<tr><td width="100%">&nbsp;</td>' +
+                           '<td><img id="kbd_img" src="/webshell/keyboard.png" /></td>' +
+                           '<td>&nbsp;&nbsp;&nbsp;&nbsp;</td></tr>' +
+                         '</table>' +
+                         '<pre id="lineheight">&nbsp;</pre>' +
+                         '<pre id="console">' +
+                           '<pre></pre>' +
+                           '<div id="ieprobe"><span>&nbsp;</span></div>' +
+                         '</pre>' +
+                         '<pre id="alt_console" style="display: none"></pre>' +
+                         '<div id="padding"></div>' +
+                         '<pre id="cursor">&nbsp;</pre>' +
+                       '</div>' +
+                       '<div class="hidden">' +
+                         '<div id="usercss"></div>' +
+                         '<pre><div><span id="space"></span></div></pre>' +
+                         '<input type="textfield" id="input" autocorrect="off" autocapitalize="off" />' +
+                         '<input type="textfield" id="cliphelper" />' +
+                         (typeof suppressAllAudio != 'undefined' &&
+                          suppressAllAudio ? "" :
+                         embed + '<bgsound id="beep_bgsound" loop=1 />') +
+                          '<iframe id="layout" src="/webshell/keyboard.html" />' +
+                        '</div>';
+  }
+
+  // Find the object used for playing the "beep" sound, if any.
+  if (typeof suppressAllAudio != 'undefined' && suppressAllAudio) {
+    this.beeper                = undefined;
+  } else {
+    this.beeper                = this.getChildById(this.container,
+                                                   'beep_embed');
+    if (!this.beeper || !this.beeper.Play) {
+      this.beeper              = this.getChildById(this.container,
+                                                   'beep_bgsound');
+      if (!this.beeper || typeof this.beeper.src == 'undefined') {
+        this.beeper            = undefined;
+      }
+    }
+  }
+
+  // Initialize the variables for finding the text console and the
+  // cursor.
+  this.reconnectBtn            = this.getChildById(this.container,'reconnect');
+  this.curSizeBox              = this.getChildById(this.container, 'cursize');
+  this.menu                    = this.getChildById(this.container, 'menu');
+  this.keyboard                = this.getChildById(this.container, 'keyboard');
+  this.keyboardImage           = this.getChildById(this.container, 'kbd_img');
+  this.layout                  = this.getChildById(this.container, 'layout');
+  this.scrollable              = this.getChildById(this.container,
+                                                                 'scrollable');
+  this.lineheight              = this.getChildById(this.container,
+                                                                 'lineheight');
+  this.console                 =
+                          [ this.getChildById(this.container, 'console'),
+                            this.getChildById(this.container, 'alt_console') ];
+  var ieProbe                  = this.getChildById(this.container, 'ieprobe');
+  this.padding                 = this.getChildById(this.container, 'padding');
+  this.cursor                  = this.getChildById(this.container, 'cursor');
+  this.usercss                 = this.getChildById(this.container, 'usercss');
+  this.space                   = this.getChildById(this.container, 'space');
+  this.input                   = this.getChildById(this.container, 'input');
+  this.cliphelper              = this.getChildById(this.container,
+                                                                 'cliphelper');
+
+  // Add any user selectable style sheets to the menu
+  this.initializeUserCSSStyles();
+
+  // Remember the dimensions of a standard character glyph. We would
+  // expect that we could just check cursor.clientWidth/Height at any time,
+  // but it turns out that browsers sometimes invalidate these values
+  // (e.g. while displaying a print preview screen).
+  this.cursorWidth             = this.cursor.clientWidth;
+  this.cursorHeight            = this.lineheight.clientHeight;
+
+  // IE has a slightly different boxing model, that we need to compensate for
+  this.isIE                    = ieProbe.offsetTop > 1;
+  ieProbe                      = undefined;
+  this.console.innerHTML       = '';
+
+  // Determine if the terminal window is positioned at the beginning of the
+  // page, or if it is embedded somewhere else in the page. For full-screen
+  // terminals, automatically resize whenever the browser window changes.
+  var marginTop                = parseInt(this.getCurrentComputedStyle(
+                                          document.body, 'marginTop'));
+  var marginLeft               = parseInt(this.getCurrentComputedStyle(
+                                          document.body, 'marginLeft'));
+  var marginRight              = parseInt(this.getCurrentComputedStyle(
+                                          document.body, 'marginRight'));
+  var x                        = this.container.offsetLeft;
+  var y                        = this.container.offsetTop;
+  for (var parent = this.container; parent = parent.offsetParent; ) {
+    x                         += parent.offsetLeft;
+    y                         += parent.offsetTop;
+  }
+  this.isEmbedded              = marginTop != y ||
+                                 marginLeft != x ||
+                                 (window.innerWidth ||
+                                  document.documentElement.clientWidth ||
+                                  document.body.clientWidth) -
+                                 marginRight != x + this.container.offsetWidth;
+  if (!this.isEmbedded) {
+    // Some browsers generate resize events when the terminal is first
+    // shown. Disable showing the size indicator until a little bit after
+    // the terminal has been rendered the first time.
+    this.indicateSize          = false;
+    setTimeout(function(vt100) {
+      return function() {
+        vt100.indicateSize     = true;
+      };
+    }(this), 100);
+    this.addListener(window, 'resize', 
+                     function(vt100) {
+                       return function() {
+                         vt100.hideContextMenu();
+                         vt100.resizer();
+                         vt100.showCurrentSize();
+                        }
+                      }(this));
+    
+    // Hide extra scrollbars attached to window
+    document.body.style.margin = '0px';
+    try { document.body.style.overflow ='hidden'; } catch (e) { }
+    try { document.body.oncontextmenu = function() {return false;};} catch(e){}
+  }
+
+  // Set up onscreen soft keyboard
+  this.initializeKeyboardButton();
+
+  // Hide context menu
+  this.hideContextMenu();
+
+  // Add listener to reconnect button
+  this.addListener(this.reconnectBtn.firstChild, 'click',
+                   function(vt100) {
+                     return function() {
+                       var rc = vt100.reconnect();
+                       vt100.input.focus();
+                       return rc;
+                     }
+                   }(this));
+
+  // Add input listeners
+  this.addListener(this.input, 'blur',
+                   function(vt100) {
+                     return function() { vt100.blurCursor(); } }(this));
+  this.addListener(this.input, 'focus',
+                   function(vt100) {
+                     return function() { vt100.focusCursor(); } }(this));
+  this.addListener(this.input, 'keydown',
+                   function(vt100) {
+                     return function(e) {
+                       if (!e) e = window.event;
+                       return vt100.keyDown(e); } }(this));
+  this.addListener(this.input, 'keypress',
+                   function(vt100) {
+                     return function(e) {
+                       if (!e) e = window.event;
+                       return vt100.keyPressed(e); } }(this));
+  this.addListener(this.input, 'keyup',
+                   function(vt100) {
+                     return function(e) {
+                       if (!e) e = window.event;
+                       return vt100.keyUp(e); } }(this));
+
+  // Attach listeners that move the focus to the <input> field. This way we
+  // can make sure that we can receive keyboard input.
+  var mouseEvent               = function(vt100, type) {
+    return function(e) {
+      if (!e) e = window.event;
+      return vt100.mouseEvent(e, type);
+    };
+  };
+  this.addListener(this.scrollable,'mousedown',mouseEvent(this, 0 /* MOUSE_DOWN */));
+  this.addListener(this.scrollable,'mouseup',  mouseEvent(this, 1 /* MOUSE_UP */));
+  this.addListener(this.scrollable,'click',    mouseEvent(this, 2 /* MOUSE_CLICK */));
+
+  // Check that browser supports drag and drop
+  if ('draggable' in document.createElement('span')) {
+      var dropEvent            = function (vt100) {
+          return function(e) {
+              if (!e) e = window.event;
+              if (e.preventDefault) e.preventDefault();
+              vt100.keysPressed(e.dataTransfer.getData('Text'));
+              return false;
+          };
+      };
+      // Tell the browser that we *can* drop on this target
+      this.addListener(this.scrollable, 'dragover', cancel);
+      this.addListener(this.scrollable, 'dragenter', cancel);
+
+      // Add a listener for the drop event
+      this.addListener(this.scrollable, 'drop', dropEvent(this));
+  }
+  
+  // Initialize the blank terminal window.
+  this.currentScreen           = 0;
+  this.cursorX                 = 0;
+  this.cursorY                 = 0;
+  this.numScrollbackLines      = 0;
+  this.top                     = 0;
+  this.bottom                  = 0x7FFFFFFF;
+  this.scale                   = 1.0;
+  this.resizer();
+  this.focusCursor();
+  this.input.focus();
+};
+
+function cancel(event) {
+  if (event.preventDefault) {
+    event.preventDefault();
+  }
+  return false;
+}
+
+VT100.prototype.getChildById = function(parent, id) {
+  var nodeList = parent.all || parent.getElementsByTagName('*');
+  if (typeof nodeList.namedItem == 'undefined') {
+    for (var i = 0; i < nodeList.length; i++) {
+      if (nodeList[i].id == id) {
+        return nodeList[i];
+      }
+    }
+    return null;
+  } else {
+    var elem = (parent.all || parent.getElementsByTagName('*')).namedItem(id);
+    return elem ? elem[0] || elem : null;
+  }
+};
+
+VT100.prototype.getCurrentComputedStyle = function(elem, style) {
+  if (typeof elem.currentStyle != 'undefined') {
+    return elem.currentStyle[style];
+  } else {
+    return document.defaultView.getComputedStyle(elem, null)[style];
+  }
+};
+
+VT100.prototype.reconnect = function() {
+  return false;
+};
+
+VT100.prototype.showReconnect = function(state) {
+  if (state) {
+    this.hideSoftKeyboard();
+    this.reconnectBtn.style.visibility = '';
+  } else {
+    this.reconnectBtn.style.visibility = 'hidden';
+  }
+};
+
+VT100.prototype.repairElements = function(console) {
+  for (var line = console.firstChild; line; line = line.nextSibling) {
+    if (!line.clientHeight) {
+      var newLine = document.createElement(line.tagName);
+      newLine.style.cssText       = line.style.cssText;
+      newLine.className           = line.className;
+      if (line.tagName == 'DIV') {
+        for (var span = line.firstChild; span; span = span.nextSibling) {
+          var newSpan             = document.createElement(span.tagName);
+          newSpan.style.cssText   = span.style.cssText;
+          newSpan.className      = span.className;
+          this.setTextContent(newSpan, this.getTextContent(span));
+          newLine.appendChild(newSpan);
+        }
+      } else {
+        this.setTextContent(newLine, this.getTextContent(line));
+      }
+      line.parentNode.replaceChild(newLine, line);
+      line                        = newLine;
+    }
+  }
+};
+
+VT100.prototype.resized = function(w, h) {
+};
+
+VT100.prototype.resizer = function() {
+  // Hide onscreen soft keyboard
+  this.hideSoftKeyboard();
+
+  // The cursor can get corrupted if the print-preview is displayed in Firefox.
+  // Recreating it, will repair it.
+  var newCursor                = document.createElement('pre');
+  this.setTextContent(newCursor, ' ');
+  newCursor.id                 = 'cursor';
+  newCursor.style.cssText      = this.cursor.style.cssText;
+  this.cursor.parentNode.insertBefore(newCursor, this.cursor);
+  if (!newCursor.clientHeight) {
+    // Things are broken right now. This is probably because we are
+    // displaying the print-preview. Just don't change any of our settings
+    // until the print dialog is closed again.
+    newCursor.parentNode.removeChild(newCursor);
+    return;
+  } else {
+    // Swap the old broken cursor for the newly created one.
+    this.cursor.parentNode.removeChild(this.cursor);
+    this.cursor                = newCursor;
+  }
+
+  // Really horrible things happen if the contents of the terminal changes
+  // while the print-preview is showing. We get HTML elements that show up
+  // in the DOM, but that do not take up any space. Find these elements and
+  // try to fix them.
+  this.repairElements(this.console[0]);
+  this.repairElements(this.console[1]);
+
+  // Lock the cursor size to the size of a normal character. This helps with
+  // characters that are taller/shorter than normal. Unfortunately, we will
+  // still get confused if somebody enters a character that is wider/narrower
+  // than normal. This can happen if the browser tries to substitute a
+  // characters from a different font.
+  this.cursor.style.width      = this.cursorWidth  + 'px';
+  this.cursor.style.height     = this.cursorHeight + 'px';
+
+  // Adjust height for one pixel padding of the #vt100 element.
+  // The latter is necessary to properly display the inactive cursor.
+  var console                  = this.console[this.currentScreen];
+  var height                   = (this.isEmbedded ? this.container.clientHeight
+                                  : (window.innerHeight ||
+                                     document.documentElement.clientHeight ||
+                                     document.body.clientHeight))-1;
+  var partial                  = height % this.cursorHeight;
+  this.scrollable.style.height = (height > 0 ? height : 0) + 'px';
+  this.padding.style.height    = (partial > 0 ? partial : 0) + 'px';
+  var oldTerminalHeight        = this.terminalHeight;
+  this.updateWidth();
+  this.updateHeight();
+
+  // Clip the cursor to the visible screen.
+  var cx                       = this.cursorX;
+  var cy                       = this.cursorY + this.numScrollbackLines;
+
+  // The alternate screen never keeps a scroll back buffer.
+  this.updateNumScrollbackLines();
+  while (this.currentScreen && this.numScrollbackLines > 0) {
+    console.removeChild(console.firstChild);
+    this.numScrollbackLines--;
+  }
+  cy                          -= this.numScrollbackLines;
+  if (cx < 0) {
+    cx                         = 0;
+  } else if (cx > this.terminalWidth) {
+    cx                         = this.terminalWidth - 1;
+    if (cx < 0) {
+      cx                       = 0;
+    }
+  }
+  if (cy < 0) {
+    cy                         = 0;
+  } else if (cy > this.terminalHeight) {
+    cy                         = this.terminalHeight - 1;
+    if (cy < 0) {
+      cy                       = 0;
+    }
+  }
+
+  // Clip the scroll region to the visible screen.
+  if (this.bottom > this.terminalHeight ||
+      this.bottom == oldTerminalHeight) {
+    this.bottom                = this.terminalHeight;
+  }
+  if (this.top >= this.bottom) {
+    this.top                   = this.bottom-1;
+    if (this.top < 0) {
+      this.top                 = 0;
+    }
+  }
+
+  // Truncate lines, if necessary. Explicitly reposition cursor (this is
+  // particularly important after changing the screen number), and reset
+  // the scroll region to the default.
+  this.truncateLines(this.terminalWidth);
+  this.putString(cx, cy, '', undefined);
+  this.scrollable.scrollTop    = this.numScrollbackLines *
+                                 this.cursorHeight + 1;
+
+  // Update classNames for lines in the scrollback buffer
+  var line                     = console.firstChild;
+  for (var i = 0; i < this.numScrollbackLines; i++) {
+    line.className             = 'scrollback';
+    line                       = line.nextSibling;
+  }
+  while (line) {
+    line.className             = '';
+    line                       = line.nextSibling;
+  }
+
+  // Reposition the reconnect button
+  this.reconnectBtn.style.left = (this.terminalWidth*this.cursorWidth/
+                                  this.scale -
+                                  this.reconnectBtn.clientWidth)/2 + 'px';
+  this.reconnectBtn.style.top  = (this.terminalHeight*this.cursorHeight-
+                                  this.reconnectBtn.clientHeight)/2 + 'px';
+
+  // Send notification that the window size has been changed
+  this.resized(this.terminalWidth, this.terminalHeight);
+};
+
+VT100.prototype.showCurrentSize = function() {
+  if (!this.indicateSize) {
+    return;
+  }
+  this.curSizeBox.innerHTML             = '' + this.terminalWidth + 'x' +
+                                               this.terminalHeight;
+  this.curSizeBox.style.left            =
+                                      (this.terminalWidth*this.cursorWidth/
+                                       this.scale -
+                                       this.curSizeBox.clientWidth)/2 + 'px';
+  this.curSizeBox.style.top             =
+                                      (this.terminalHeight*this.cursorHeight -
+                                       this.curSizeBox.clientHeight)/2 + 'px';
+  this.curSizeBox.style.visibility      = '';
+  if (this.curSizeTimeout) {
+    clearTimeout(this.curSizeTimeout);
+  }
+
+  // Only show the terminal size for a short amount of time after resizing.
+  // Then hide this information, again. Some browsers generate resize events
+  // throughout the entire resize operation. This is nice, and we will show
+  // the terminal size while the user is dragging the window borders.
+  // Other browsers only generate a single event when the user releases the
+  // mouse. In those cases, we can only show the terminal size once at the
+  // end of the resize operation.
+  this.curSizeTimeout                   = setTimeout(function(vt100) {
+    return function() {
+      vt100.curSizeTimeout              = null;
+      vt100.curSizeBox.style.visibility = 'hidden';
+    };
+  }(this), 1000);
+};
+
+VT100.prototype.selection = function() {
+  try {
+    return '' + (window.getSelection && window.getSelection() ||
+                 document.selection && document.selection.type == 'Text' &&
+                 document.selection.createRange().text || '');
+  } catch (e) {
+  }
+  return '';
+};
+
+VT100.prototype.cancelEvent = function(event) {
+  try {
+    // For non-IE browsers
+    event.stopPropagation();
+    event.preventDefault();
+  } catch (e) {
+  }
+  try {
+    // For IE
+    event.cancelBubble = true;
+    event.returnValue  = false;
+    event.button       = 0;
+    event.keyCode      = 0;
+  } catch (e) {
+  }
+  return false;
+};
+
+VT100.prototype.mousePosition = function(event) {
+  var offsetX      = this.container.offsetLeft;
+  var offsetY      = this.container.offsetTop;
+  for (var e = this.container; e = e.offsetParent; ) {
+    offsetX       += e.offsetLeft;
+    offsetY       += e.offsetTop;
+  }
+  return [ event.clientX - offsetX,
+           event.clientY - offsetY ];
+};
+
+VT100.prototype.mouseEvent = function(event, type) {
+  // If any text is currently selected, do not move the focus as that would
+  // invalidate the selection.
+  var selection    = this.selection();
+  if ((type == 1 /* MOUSE_UP */ || type == 2 /* MOUSE_CLICK */) && !selection.length) {
+    this.input.focus();
+  }
+
+  // Compute mouse position in characters.
+  var position     = this.mousePosition(event);
+  var x            = Math.floor(position[0] / this.cursorWidth);
+  var y            = Math.floor((position[1] + this.scrollable.scrollTop) /
+                                this.cursorHeight) - this.numScrollbackLines;
+  var inside       = true;
+  if (x >= this.terminalWidth) {
+    x              = this.terminalWidth - 1;
+    inside         = false;
+  }
+  if (x < 0) {
+    x              = 0;
+    inside         = false;
+  }
+  if (y >= this.terminalHeight) {
+    y              = this.terminalHeight - 1;
+    inside         = false;
+  }
+  if (y < 0) {
+    y              = 0;
+    inside         = false;
+  }
+
+  // Compute button number and modifier keys.
+  var button       = type != 0 /* MOUSE_DOWN */ ? 3 :
+                     typeof event.pageX != 'undefined' ? event.button :
+                     [ undefined, 0, 2, 0, 1, 0, 1, 0  ][event.button];
+  if (button != undefined) {
+    if (event.shiftKey) {
+      button      |= 0x04;
+    }
+    if (event.altKey || event.metaKey) {
+      button      |= 0x08;
+    }
+    if (event.ctrlKey) {
+      button      |= 0x10;
+    }
+  }
+
+  // Report mouse events if they happen inside of the current screen and
+  // with the SHIFT key unpressed. Both of these restrictions do not apply
+  // for button releases, as we always want to report those.
+  if (this.mouseReporting && !selection.length &&
+      (type != 0 /* MOUSE_DOWN */ || !event.shiftKey)) {
+    if (inside || type != 0 /* MOUSE_DOWN */) {
+      if (button != undefined) {
+        var report = '\u001B[M' + String.fromCharCode(button + 32) +
+                                  String.fromCharCode(x      + 33) +
+                                  String.fromCharCode(y      + 33);
+        if (type != 2 /* MOUSE_CLICK */) {
+          this.keysPressed(report);
+        }
+
+        // If we reported the event, stop propagating it (not sure, if this
+        // actually works on most browsers; blocking the global "oncontextmenu"
+        // even is still necessary).
+        return this.cancelEvent(event);
+      }
+    }
+  }
+
+  // Bring up context menu.
+  if (button == 2 && !event.shiftKey) {
+    if (type == 0 /* MOUSE_DOWN */) {
+      this.showContextMenu(position[0], position[1]);
+    }
+    return this.cancelEvent(event);
+  }
+
+  if (this.mouseReporting) {
+    try {
+      event.shiftKey         = false;
+    } catch (e) {
+    }
+  }
+
+  return true;
+};
+
+VT100.prototype.replaceChar = function(s, ch, repl) {
+  for (var i = -1;;) {
+    i = s.indexOf(ch, i + 1);
+    if (i < 0) {
+      break;
+    }
+    s = s.substr(0, i) + repl + s.substr(i + 1);
+  }
+  return s;
+};
+
+VT100.prototype.htmlEscape = function(s) {
+  return this.replaceChar(this.replaceChar(this.replaceChar(this.replaceChar(
+                s, '&', '&amp;'), '<', '&lt;'), '"', '&quot;'), ' ', '\u00A0');
+};
+
+VT100.prototype.getTextContent = function(elem) {
+  return elem.textContent ||
+         (typeof elem.textContent == 'undefined' ? elem.innerText : '');
+};
+
+VT100.prototype.setTextContentRaw = function(elem, s) {
+  // Updating the content of an element is an expensive operation. It actually
+  // pays off to first check whether the element is still unchanged.
+  if (typeof elem.textContent == 'undefined') {
+    if (elem.innerText != s) {
+      try {
+        elem.innerText = s;
+      } catch (e) {
+        // Very old versions of IE do not allow setting innerText. Instead,
+        // remove all children, by setting innerHTML and then set the text
+        // using DOM methods.
+        elem.innerHTML = '';
+        elem.appendChild(document.createTextNode(
+                                          this.replaceChar(s, ' ', '\u00A0')));
+      }
+    }
+  } else {
+    if (elem.textContent != s) {
+      elem.textContent = s;
+    }
+  }
+};
+
+VT100.prototype.setTextContent = function(elem, s) {
+  // Check if we find any URLs in the text. If so, automatically convert them
+  // to links.
+  if (this.urlRE && this.urlRE.test(s)) {
+    var inner          = '';
+    for (;;) {
+      var consumed = 0;
+      if (RegExp.leftContext != null) {
+        inner         += this.htmlEscape(RegExp.leftContext);
+        consumed      += RegExp.leftContext.length;
+      }
+      var url          = this.htmlEscape(RegExp.lastMatch);
+      var fullUrl      = url;
+
+      // If no protocol was specified, try to guess a reasonable one.
+      if (url.indexOf('http://') < 0 && url.indexOf('https://') < 0 &&
+          url.indexOf('ftp://')  < 0 && url.indexOf('mailto:')  < 0) {
+        var slash      = url.indexOf('/');
+        var at         = url.indexOf('@');
+        var question   = url.indexOf('?');
+        if (at > 0 &&
+            (at < question || question < 0) &&
+            (slash < 0 || (question > 0 && slash > question))) {
+          fullUrl      = 'mailto:' + url;
+        } else {
+          fullUrl      = (url.indexOf('ftp.') == 0 ? 'ftp://' : 'http://') +
+                          url;
+        }
+      }
+
+      inner           += '<a target="vt100Link" href="' + fullUrl +
+                         '">' + url + '</a>';
+      consumed        += RegExp.lastMatch.length;
+      s                = s.substr(consumed);
+      if (!this.urlRE.test(s)) {
+        if (RegExp.rightContext != null) {
+          inner       += this.htmlEscape(RegExp.rightContext);
+        }
+        break;
+      }
+    }
+    elem.innerHTML     = inner;
+    return;
+  }
+
+  this.setTextContentRaw(elem, s);
+};
+
+VT100.prototype.insertBlankLine = function(y, color, style) {
+  // Insert a blank line a position y. This method ignores the scrollback
+  // buffer. The caller has to add the length of the scrollback buffer to
+  // the position, if necessary.
+  // If the position is larger than the number of current lines, this
+  // method just adds a new line right after the last existing one. It does
+  // not add any missing lines in between. It is the caller's responsibility
+  // to do so.
+  if (!color) {
+    color                = 'ansi0 bgAnsi15';
+  }
+  if (!style) {
+    style                = '';
+  }
+  var line;
+  if (color != 'ansi0 bgAnsi15' && !style) {
+    line                 = document.createElement('pre');
+    this.setTextContent(line, '\n');
+  } else {
+    line                 = document.createElement('div');
+    var span             = document.createElement('span');
+    span.style.cssText   = style;
+    span.className      = color;
+    this.setTextContent(span, this.spaces(this.terminalWidth));
+    line.appendChild(span);
+  }
+  line.style.height      = this.cursorHeight + 'px';
+  var console            = this.console[this.currentScreen];
+  if (console.childNodes.length > y) {
+    console.insertBefore(line, console.childNodes[y]);
+  } else {
+    console.appendChild(line);
+  }
+};
+
+VT100.prototype.updateWidth = function() {
+  this.terminalWidth = Math.floor(this.console[this.currentScreen].offsetWidth/
+                                  this.cursorWidth*this.scale);
+  return this.terminalWidth;
+};
+
+VT100.prototype.updateHeight = function() {
+  // We want to be able to display either a terminal window that fills the
+  // entire browser window, or a terminal window that is contained in a
+  // <div> which is embededded somewhere in the web page.
+  if (this.isEmbedded) {
+    // Embedded terminal. Use size of the containing <div> (id="vt100").
+    this.terminalHeight = Math.floor((this.container.clientHeight-1) /
+                                     this.cursorHeight);
+  } else {
+    // Use the full browser window.
+    this.terminalHeight = Math.floor(((window.innerHeight ||
+                                       document.documentElement.clientHeight ||
+                                       document.body.clientHeight)-1)/
+                                     this.cursorHeight);
+  }
+  return this.terminalHeight;
+};
+
+VT100.prototype.updateNumScrollbackLines = function() {
+  var scrollback          = Math.floor(
+                                this.console[this.currentScreen].offsetHeight /
+                                this.cursorHeight) -
+                            this.terminalHeight;
+  this.numScrollbackLines = scrollback < 0 ? 0 : scrollback;
+  return this.numScrollbackLines;
+};
+
+VT100.prototype.truncateLines = function(width) {
+  if (width < 0) {
+    width             = 0;
+  }
+  for (var line = this.console[this.currentScreen].firstChild; line;
+       line = line.nextSibling) {
+    if (line.tagName == 'DIV') {
+      var x           = 0;
+
+      // Traverse current line and truncate it once we saw "width" characters
+      for (var span = line.firstChild; span;
+           span = span.nextSibling) {
+        var s         = this.getTextContent(span);
+        var l         = s.length;
+        if (x + l > width) {
+          this.setTextContent(span, s.substr(0, width - x));
+          while (span.nextSibling) {
+            line.removeChild(line.lastChild);
+          }
+          break;
+        }
+        x            += l;
+      }
+      // Prune white space from the end of the current line
+      var span       = line.lastChild;
+      while (span &&
+             span.className == 'ansi0 bgAnsi15' &&
+             !span.style.cssText.length) {
+        // Scan backwards looking for first non-space character
+        var s         = this.getTextContent(span);
+        for (var i = s.length; i--; ) {
+          if (s.charAt(i) != ' ' && s.charAt(i) != '\u00A0') {
+            if (i+1 != s.length) {
+              this.setTextContent(s.substr(0, i+1));
+            }
+            span      = null;
+            break;
+          }
+        }
+        if (span) {
+          var sibling = span;
+          span        = span.previousSibling;
+          if (span) {
+            // Remove blank <span>'s from end of line
+            line.removeChild(sibling);
+          } else {
+            // Remove entire line (i.e. <div>), if empty
+            var blank = document.createElement('pre');
+            blank.style.height = this.cursorHeight + 'px';
+            this.setTextContent(blank, '\n');
+            line.parentNode.replaceChild(blank, line);
+          }
+        }
+      }
+    }
+  }
+};
+
+VT100.prototype.putString = function(x, y, text, color, style) {
+  if (!color) {
+    color                           = 'ansi0 bgAnsi15';
+  }
+  if (!style) {
+    style                           = '';
+  }
+  var yIdx                          = y + this.numScrollbackLines;
+  var line;
+  var sibling;
+  var s;
+  var span;
+  var xPos                          = 0;
+  var console                       = this.console[this.currentScreen];
+  if (!text.length && (yIdx >= console.childNodes.length ||
+                       console.childNodes[yIdx].tagName != 'DIV')) {
+    // Positioning cursor to a blank location
+    span                            = null;
+  } else {
+    // Create missing blank lines at end of page
+    while (console.childNodes.length <= yIdx) {
+      // In order to simplify lookups, we want to make sure that each line
+      // is represented by exactly one element (and possibly a whole bunch of
+      // children).
+      // For non-blank lines, we can create a <div> containing one or more
+      // <span>s. For blank lines, this fails as browsers tend to optimize them
+      // away. But fortunately, a <pre> tag containing a newline character
+      // appears to work for all browsers (a &nbsp; would also work, but then
+      // copying from the browser window would insert superfluous spaces into
+      // the clipboard).
+      this.insertBlankLine(yIdx);
+    }
+    line                            = console.childNodes[yIdx];
+    
+    // If necessary, promote blank '\n' line to a <div> tag
+    if (line.tagName != 'DIV') {
+      var div                       = document.createElement('div');
+      div.style.height              = this.cursorHeight + 'px';
+      div.innerHTML                 = '<span></span>';
+      console.replaceChild(div, line);
+      line                          = div;
+    }
+
+    // Scan through list of <span>'s until we find the one where our text
+    // starts
+    span                            = line.firstChild;
+    var len;
+    while (span.nextSibling && xPos < x) {
+      len                           = this.getTextContent(span).length;
+      if (xPos + len > x) {
+        break;
+      }
+      xPos                         += len;
+      span                          = span.nextSibling;
+    }
+
+    if (text.length) {
+      // If current <span> is not long enough, pad with spaces or add new
+      // span
+      s                             = this.getTextContent(span);
+      var oldColor                  = span.className;
+      var oldStyle                  = span.style.cssText;
+      if (xPos + s.length < x) {
+        if (oldColor != 'ansi0 bgAnsi15' || oldStyle != '') {
+          span                      = document.createElement('span');
+          line.appendChild(span);
+          span.className            = 'ansi0 bgAnsi15';
+          span.style.cssText        = '';
+          oldColor                  = 'ansi0 bgAnsi15';
+          oldStyle                  = '';
+          xPos                     += s.length;
+          s                         = '';
+        }
+        do {
+          s                        += ' ';
+        } while (xPos + s.length < x);
+      }
+    
+      // If styles do not match, create a new <span>
+      var del                       = text.length - s.length + x - xPos;
+      if (oldColor != color ||
+          (oldStyle != style && (oldStyle || style))) {
+        if (xPos == x) {
+          // Replacing text at beginning of existing <span>
+          if (text.length >= s.length) {
+            // New text is equal or longer than existing text
+            s                       = text;
+          } else {
+            // Insert new <span> before the current one, then remove leading
+            // part of existing <span>, adjust style of new <span>, and finally
+            // set its contents
+            sibling                 = document.createElement('span');
+            line.insertBefore(sibling, span);
+            this.setTextContent(span, s.substr(text.length));
+            span                    = sibling;
+            s                       = text;
+          }
+        } else {
+          // Replacing text some way into the existing <span>
+          var remainder             = s.substr(x + text.length - xPos);
+          this.setTextContent(span, s.substr(0, x - xPos));
+          xPos                      = x;
+          sibling                   = document.createElement('span');
+          if (span.nextSibling) {
+            line.insertBefore(sibling, span.nextSibling);
+            span                    = sibling;
+            if (remainder.length) {
+              sibling               = document.createElement('span');
+              sibling.className     = oldColor;
+              sibling.style.cssText = oldStyle;
+              this.setTextContent(sibling, remainder);
+              line.insertBefore(sibling, span.nextSibling);
+            }
+          } else {
+            line.appendChild(sibling);
+            span                    = sibling;
+            if (remainder.length) {
+              sibling               = document.createElement('span');
+              sibling.className     = oldColor;
+              sibling.style.cssText = oldStyle;
+              this.setTextContent(sibling, remainder);
+              line.appendChild(sibling);
+            }
+          }
+          s                         = text;
+        }
+        span.className              = color;
+        span.style.cssText          = style;
+      } else {
+        // Overwrite (partial) <span> with new text
+        s                           = s.substr(0, x - xPos) +
+          text +
+          s.substr(x + text.length - xPos);
+      }
+      this.setTextContent(span, s);
+
+      
+      // Delete all subsequent <span>'s that have just been overwritten
+      sibling                       = span.nextSibling;
+      while (del > 0 && sibling) {
+        s                           = this.getTextContent(sibling);
+        len                         = s.length;
+        if (len <= del) {
+          line.removeChild(sibling);
+          del                      -= len;
+          sibling                   = span.nextSibling;
+        } else {
+          this.setTextContent(sibling, s.substr(del));
+          break;
+        }
+      }
+      
+      // Merge <span> with next sibling, if styles are identical
+      if (sibling && span.className == sibling.className &&
+          span.style.cssText == sibling.style.cssText) {
+        this.setTextContent(span,
+                            this.getTextContent(span) +
+                            this.getTextContent(sibling));
+        line.removeChild(sibling);
+      }
+    }
+  }
+
+  // Position cursor
+  this.cursorX                      = x + text.length;
+  if (this.cursorX >= this.terminalWidth) {
+    this.cursorX                    = this.terminalWidth - 1;
+    if (this.cursorX < 0) {
+      this.cursorX                  = 0;
+    }
+  }
+  var pixelX                        = -1;
+  var pixelY                        = -1;
+  if (!this.cursor.style.visibility) {
+    var idx                         = this.cursorX - xPos;
+    if (span) {
+      // If we are in a non-empty line, take the cursor Y position from the
+      // other elements in this line. If dealing with broken, non-proportional
+      // fonts, this is likely to yield better results.
+      pixelY                        = span.offsetTop +
+                                      span.offsetParent.offsetTop;
+      s                             = this.getTextContent(span);
+      var nxtIdx                    = idx - s.length;
+      if (nxtIdx < 0) {
+        this.setTextContent(this.cursor, s.charAt(idx));
+        pixelX                      = span.offsetLeft +
+                                      idx*span.offsetWidth / s.length;
+      } else {
+        if (nxtIdx == 0) {
+          pixelX                    = span.offsetLeft + span.offsetWidth;
+        }
+        if (span.nextSibling) {
+          s                         = this.getTextContent(span.nextSibling);
+          this.setTextContent(this.cursor, s.charAt(nxtIdx));
+          if (pixelX < 0) {
+            pixelX                  = span.nextSibling.offsetLeft +
+                                      nxtIdx*span.offsetWidth / s.length;
+          }
+        } else {
+          this.setTextContent(this.cursor, ' ');
+        }
+      }
+    } else {
+      this.setTextContent(this.cursor, ' ');
+    }
+  }
+  if (pixelX >= 0) {
+    this.cursor.style.left          = (pixelX + (this.isIE ? 1 : 0))/
+                                      this.scale + 'px';
+  } else {
+    this.setTextContent(this.space, this.spaces(this.cursorX));
+    this.cursor.style.left          = (this.space.offsetWidth +
+                                       console.offsetLeft)/this.scale + 'px';
+  }
+  this.cursorY                      = yIdx - this.numScrollbackLines;
+  if (pixelY >= 0) {
+    this.cursor.style.top           = pixelY + 'px';
+  } else {
+    this.cursor.style.top           = yIdx*this.cursorHeight +
+                                      console.offsetTop + 'px';
+  }
+
+  if (text.length) {
+    // Merge <span> with previous sibling, if styles are identical
+    if ((sibling = span.previousSibling) &&
+        span.className == sibling.className &&
+        span.style.cssText == sibling.style.cssText) {
+      this.setTextContent(span,
+                          this.getTextContent(sibling) +
+                          this.getTextContent(span));
+      line.removeChild(sibling);
+    }
+    
+    // Prune white space from the end of the current line
+    span                            = line.lastChild;
+    while (span &&
+           span.className == 'ansi0 bgAnsi15' &&
+           !span.style.cssText.length) {
+      // Scan backwards looking for first non-space character
+      s                             = this.getTextContent(span);
+      for (var i = s.length; i--; ) {
+        if (s.charAt(i) != ' ' && s.charAt(i) != '\u00A0') {
+          if (i+1 != s.length) {
+            this.setTextContent(s.substr(0, i+1));
+          }
+          span                      = null;
+          break;
+        }
+      }
+      if (span) {
+        sibling                     = span;
+        span                        = span.previousSibling;
+        if (span) {
+          // Remove blank <span>'s from end of line
+          line.removeChild(sibling);
+        } else {
+          // Remove entire line (i.e. <div>), if empty
+          var blank                 = document.createElement('pre');
+          blank.style.height        = this.cursorHeight + 'px';
+          this.setTextContent(blank, '\n');
+          line.parentNode.replaceChild(blank, line);
+        }
+      }
+    }
+  }
+};
+
+VT100.prototype.gotoXY = function(x, y) {
+  if (x >= this.terminalWidth) {
+    x           = this.terminalWidth - 1;
+  }
+  if (x < 0) {
+    x           = 0;
+  }
+  var minY, maxY;
+  if (this.offsetMode) {
+    minY        = this.top;
+    maxY        = this.bottom;
+  } else {
+    minY        = 0;
+    maxY        = this.terminalHeight;
+  }
+  if (y >= maxY) {
+    y           = maxY - 1;
+  }
+  if (y < minY) {
+    y           = minY;
+  }
+  this.putString(x, y, '', undefined);
+  this.needWrap = false;
+};
+
+VT100.prototype.gotoXaY = function(x, y) {
+  this.gotoXY(x, this.offsetMode ? (this.top + y) : y);
+};
+
+VT100.prototype.refreshInvertedState = function() {
+  if (this.isInverted) {
+    this.scrollable.className += ' inverted';
+  } else {
+    this.scrollable.className = this.scrollable.className.
+                                                     replace(/ *inverted/, '');
+  }
+};
+
+VT100.prototype.enableAlternateScreen = function(state) {
+  // Don't do anything, if we are already on the desired screen
+  if ((state ? 1 : 0) == this.currentScreen) {
+    // Calling the resizer is not actually necessary. But it is a good way
+    // of resetting state that might have gotten corrupted.
+    this.resizer();
+    return;
+  }
+  
+  // We save the full state of the normal screen, when we switch away from it.
+  // But for the alternate screen, no saving is necessary. We always reset
+  // it when we switch to it.
+  if (state) {
+    this.saveCursor();
+  }
+
+  // Display new screen, and initialize state (the resizer does that for us).
+  this.currentScreen                                 = state ? 1 : 0;
+  this.console[1-this.currentScreen].style.display   = 'none';
+  this.console[this.currentScreen].style.display     = '';
+
+  // Select appropriate character pitch.
+  var transform                                      = this.getTransformName();
+  if (transform) {
+    if (state) {
+      // Upon enabling the alternate screen, we switch to 80 column mode. But
+      // upon returning to the regular screen, we restore the mode that was
+      // in effect previously.
+      this.console[1].style[transform]               = '';
+    }
+    var style                                        =
+                             this.console[this.currentScreen].style[transform];
+    this.cursor.style[transform]                     = style;
+    this.space.style[transform]                      = style;
+    this.scale                                       = style == '' ? 1.0:1.65;
+    if (transform == 'filter') {
+       this.console[this.currentScreen].style.width  = style == '' ? '165%':'';
+    }
+  }
+  this.resizer();
+
+  // If we switched to the alternate screen, reset it completely. Otherwise,
+  // restore the saved state.
+  if (state) {
+    this.gotoXY(0, 0);
+    this.clearRegion(0, 0, this.terminalWidth, this.terminalHeight);
+  } else {
+    this.restoreCursor();
+  }
+};
+
+VT100.prototype.hideCursor = function() {
+  var hidden = this.cursor.style.visibility == 'hidden';
+  if (!hidden) {
+    this.cursor.style.visibility = 'hidden';
+    return true;
+  }
+  return false;
+};
+
+VT100.prototype.showCursor = function(x, y) {
+  if (this.cursor.style.visibility) {
+    this.cursor.style.visibility = '';
+    this.putString(x == undefined ? this.cursorX : x,
+                   y == undefined ? this.cursorY : y,
+                   '', undefined);
+    return true;
+  }
+  return false;
+};
+
+VT100.prototype.scrollBack = function() {
+  var i                     = this.scrollable.scrollTop -
+                              this.scrollable.clientHeight;
+  this.scrollable.scrollTop = i < 0 ? 0 : i;
+};
+
+VT100.prototype.scrollFore = function() {
+  var i                     = this.scrollable.scrollTop +
+                              this.scrollable.clientHeight;
+  this.scrollable.scrollTop = i > this.numScrollbackLines *
+                                  this.cursorHeight + 1
+                              ? this.numScrollbackLines *
+                                this.cursorHeight + 1
+                              : i;
+};
+
+VT100.prototype.spaces = function(i) {
+  var s = '';
+  while (i-- > 0) {
+    s += ' ';
+  }
+  return s;
+};
+
+VT100.prototype.clearRegion = function(x, y, w, h, color, style) {
+  w         += x;
+  if (x < 0) {
+    x        = 0;
+  }
+  if (w > this.terminalWidth) {
+    w        = this.terminalWidth;
+  }
+  if ((w    -= x) <= 0) {
+    return;
+  }
+  h         += y;
+  if (y < 0) {
+    y        = 0;
+  }
+  if (h > this.terminalHeight) {
+    h        = this.terminalHeight;
+  }
+  if ((h    -= y) <= 0) {
+    return;
+  }
+
+  // Special case the situation where we clear the entire screen, and we do
+  // not have a scrollback buffer. In that case, we should just remove all
+  // child nodes.
+  if (!this.numScrollbackLines &&
+      w == this.terminalWidth && h == this.terminalHeight &&
+      (color == undefined || color == 'ansi0 bgAnsi15') && !style) {
+    var console = this.console[this.currentScreen];
+    while (console.lastChild) {
+      console.removeChild(console.lastChild);
+    }
+    this.putString(this.cursorX, this.cursorY, '', undefined);
+  } else {
+    var hidden = this.hideCursor();
+    var cx     = this.cursorX;
+    var cy     = this.cursorY;
+    var s      = this.spaces(w);
+    for (var i = y+h; i-- > y; ) {
+      this.putString(x, i, s, color, style);
+    }
+    hidden ? this.showCursor(cx, cy) : this.putString(cx, cy, '', undefined);
+  }
+};
+
+VT100.prototype.copyLineSegment = function(dX, dY, sX, sY, w) {
+  var text                            = [ ];
+  var className                       = [ ];
+  var style                           = [ ];
+  var console                         = this.console[this.currentScreen];
+  if (sY >= console.childNodes.length) {
+    text[0]                           = this.spaces(w);
+    className[0]                      = undefined;
+    style[0]                          = undefined;
+  } else {
+    var line = console.childNodes[sY];
+    if (line.tagName != 'DIV' || !line.childNodes.length) {
+      text[0]                         = this.spaces(w);
+      className[0]                    = undefined;
+      style[0]                        = undefined;
+    } else {
+      var x                           = 0;
+      for (var span = line.firstChild; span && w > 0; span = span.nextSibling){
+        var s                         = this.getTextContent(span);
+        var len                       = s.length;
+        if (x + len > sX) {
+          var o                       = sX > x ? sX - x : 0;
+          text[text.length]           = s.substr(o, w);
+          className[className.length] = span.className;
+          style[style.length]         = span.style.cssText;
+          w                          -= len - o;
+        }
+        x                            += len;
+      }
+      if (w > 0) {
+        text[text.length]             = this.spaces(w);
+        className[className.length]   = undefined;
+        style[style.length]           = undefined;
+      }
+    }
+  }
+  var hidden                          = this.hideCursor();
+  var cx                              = this.cursorX;
+  var cy                              = this.cursorY;
+  for (var i = 0; i < text.length; i++) {
+    var color;
+    if (className[i]) {
+      color                           = className[i];
+    } else {
+      color                           = 'ansi0 bgAnsi15';
+    }
+    this.putString(dX, dY - this.numScrollbackLines, text[i], color, style[i]);
+    dX                               += text[i].length;
+  }
+  hidden ? this.showCursor(cx, cy) : this.putString(cx, cy, '', undefined);
+};
+
+VT100.prototype.scrollRegion = function(x, y, w, h, incX, incY,
+                                        color, style) {
+  var left             = incX < 0 ? -incX : 0;
+  var right            = incX > 0 ?  incX : 0;
+  var up               = incY < 0 ? -incY : 0;
+  var down             = incY > 0 ?  incY : 0;
+
+  // Clip region against terminal size
+  var dontScroll       = null;
+  w                   += x;
+  if (x < left) {
+    x                  = left;
+  }
+  if (w > this.terminalWidth - right) {
+    w                  = this.terminalWidth - right;
+  }
+  if ((w              -= x) <= 0) {
+    dontScroll         = 1;
+  }
+  h                   += y;
+  if (y < up) {
+    y                  = up;
+  }
+  if (h > this.terminalHeight - down) {
+    h                  = this.terminalHeight - down;
+  }
+  if ((h              -= y) < 0) {
+    dontScroll         = 1;
+  }
+  if (!dontScroll) {
+    if (style && style.indexOf('underline')) {
+      // Different terminal emulators disagree on the attributes that
+      // are used for scrolling. The consensus seems to be, never to
+      // fill with underlined spaces. N.B. this is different from the
+      // cases when the user blanks a region. User-initiated blanking
+      // always fills with all of the current attributes.
+      style            = style.replace(/text-decoration:underline;/, '');
+    }
+
+    // Compute current scroll position
+    var scrollPos      = this.numScrollbackLines -
+                      (this.scrollable.scrollTop-1) / this.cursorHeight;
+
+    // Determine original cursor position. Hide cursor temporarily to avoid
+    // visual artifacts.
+    var hidden         = this.hideCursor();
+    var cx             = this.cursorX;
+    var cy             = this.cursorY;
+    var console        = this.console[this.currentScreen];
+
+    if (!incX && !x && w == this.terminalWidth) {
+      // Scrolling entire lines
+      if (incY < 0) {
+        // Scrolling up
+        if (!this.currentScreen && y == -incY &&
+            h == this.terminalHeight + incY) {
+          // Scrolling up with adding to the scrollback buffer. This is only
+          // possible if there are at least as many lines in the console,
+          // as the terminal is high
+          while (console.childNodes.length < this.terminalHeight) {
+            this.insertBlankLine(this.terminalHeight);
+          }
+          
+          // Add new lines at bottom in order to force scrolling
+          for (var i = 0; i < y; i++) {
+            this.insertBlankLine(console.childNodes.length, color, style);
+          }
+
+          // Adjust the number of lines in the scrollback buffer by
+          // removing excess entries.
+          this.updateNumScrollbackLines();
+          while (this.numScrollbackLines >
+                 (this.currentScreen ? 0 : this.maxScrollbackLines)) {
+            console.removeChild(console.firstChild);
+            this.numScrollbackLines--;
+          }
+
+          // Mark lines in the scrollback buffer, so that they do not get
+          // printed.
+          for (var i = this.numScrollbackLines, j = -incY;
+               i-- > 0 && j-- > 0; ) {
+            console.childNodes[i].className = 'scrollback';
+          }
+        } else {
+          // Scrolling up without adding to the scrollback buffer.
+          for (var i = -incY;
+               i-- > 0 &&
+               console.childNodes.length >
+               this.numScrollbackLines + y + incY; ) {
+            console.removeChild(console.childNodes[
+                                          this.numScrollbackLines + y + incY]);
+          }
+
+          // If we used to have a scrollback buffer, then we must make sure
+          // that we add back blank lines at the bottom of the terminal.
+          // Similarly, if we are scrolling in the middle of the screen,
+          // we must add blank lines to ensure that the bottom of the screen
+          // does not move up.
+          if (this.numScrollbackLines > 0 ||
+              console.childNodes.length > this.numScrollbackLines+y+h+incY) {
+            for (var i = -incY; i-- > 0; ) {
+              this.insertBlankLine(this.numScrollbackLines + y + h + incY,
+                                   color, style);
+            }
+          }
+        }
+      } else {
+        // Scrolling down
+        for (var i = incY;
+             i-- > 0 &&
+             console.childNodes.length > this.numScrollbackLines + y + h; ) {
+          console.removeChild(console.childNodes[this.numScrollbackLines+y+h]);
+        }
+        for (var i = incY; i--; ) {
+          this.insertBlankLine(this.numScrollbackLines + y, color, style);
+        }
+      }
+    } else {
+      // Scrolling partial lines
+      if (incY <= 0) {
+        // Scrolling up or horizontally within a line
+        for (var i = y + this.numScrollbackLines;
+             i < y + this.numScrollbackLines + h;
+             i++) {
+          this.copyLineSegment(x + incX, i + incY, x, i, w);
+        }
+      } else {
+        // Scrolling down
+        for (var i = y + this.numScrollbackLines + h;
+             i-- > y + this.numScrollbackLines; ) {
+          this.copyLineSegment(x + incX, i + incY, x, i, w);
+        }
+      }
+
+      // Clear blank regions
+      if (incX > 0) {
+        this.clearRegion(x, y, incX, h, color, style);
+      } else if (incX < 0) {
+        this.clearRegion(x + w + incX, y, -incX, h, color, style);
+      }
+      if (incY > 0) {
+        this.clearRegion(x, y, w, incY, color, style);
+      } else if (incY < 0) {
+        this.clearRegion(x, y + h + incY, w, -incY, color, style);
+      }
+    }
+
+    // Reset scroll position
+    this.scrollable.scrollTop = (this.numScrollbackLines-scrollPos) *
+                                this.cursorHeight + 1;
+
+    // Move cursor back to its original position
+    hidden ? this.showCursor(cx, cy) : this.putString(cx, cy, '', undefined);
+  }
+};
+
+VT100.prototype.copy = function(selection) {
+  if (selection == undefined) {
+    selection                = this.selection();
+  }
+  this.internalClipboard     = undefined;
+  if (selection.length) {
+    try {
+      // IE
+      this.cliphelper.value  = selection;
+      this.cliphelper.select();
+      this.cliphelper.createTextRange().execCommand('copy');
+    } catch (e) {
+      this.internalClipboard = selection;
+    }
+    this.cliphelper.value    = '';
+  }
+};
+
+VT100.prototype.copyLast = function() {
+  // Opening the context menu can remove the selection. We try to prevent this
+  // from happening, but that is not possible for all browsers. So, instead,
+  // we compute the selection before showing the menu.
+  this.copy(this.lastSelection);
+};
+
+VT100.prototype.pasteFnc = function() {
+  var clipboard     = undefined;
+  if (this.internalClipboard != undefined) {
+    clipboard       = this.internalClipboard;
+  } else {
+    try {
+      this.cliphelper.value = '';
+      this.cliphelper.createTextRange().execCommand('paste');
+      clipboard     = this.cliphelper.value;
+    } catch (e) {
+    }
+  }
+  this.cliphelper.value = '';
+  if (clipboard && this.menu.style.visibility == 'hidden') {
+    return function() {
+      this.keysPressed('' + clipboard);
+    };
+  } else {
+    return undefined;
+  }
+};
+
+VT100.prototype.pasteBrowserFnc = function() {
+  var clipboard     = prompt("Paste into this box:","");
+  if (clipboard != undefined) {
+     return this.keysPressed('' + clipboard);
+  }
+};
+
+VT100.prototype.toggleUTF = function() {
+  this.utfEnabled   = !this.utfEnabled;
+
+  // We always persist the last value that the user selected. Not necessarily
+  // the last value that a random program requested.
+  this.utfPreferred = this.utfEnabled;
+};
+
+VT100.prototype.toggleBell = function() {
+  this.visualBell = !this.visualBell;
+};
+
+VT100.prototype.toggleSoftKeyboard = function() {
+  this.softKeyboard = !this.softKeyboard;
+  this.keyboardImage.style.visibility = this.softKeyboard ? 'visible' : '';
+};
+
+VT100.prototype.deselectKeys = function(elem) {
+  if (elem && elem.className == 'selected') {
+    elem.className = '';
+  }
+  for (elem = elem.firstChild; elem; elem = elem.nextSibling) {
+    this.deselectKeys(elem);
+  }
+};
+
+VT100.prototype.showSoftKeyboard = function() {
+  // Make sure no key is currently selected
+  this.lastSelectedKey           = undefined;
+  this.deselectKeys(this.keyboard);
+  this.isShift                   = false;
+  this.showShiftState(false);
+  this.isCtrl                    = false;
+  this.showCtrlState(false);
+  this.isAlt                     = false;
+  this.showAltState(false);
+
+  this.keyboard.style.left       = '0px';
+  this.keyboard.style.top        = '0px';
+  this.keyboard.style.width      = this.container.offsetWidth  + 'px';
+  this.keyboard.style.height     = this.container.offsetHeight + 'px';
+  this.keyboard.style.visibility = 'hidden';
+  this.keyboard.style.display    = '';
+
+  var kbd                        = this.keyboard.firstChild;
+  var scale                      = 1.0;
+  var transform                  = this.getTransformName();
+  if (transform) {
+    kbd.style[transform]         = '';
+    if (kbd.offsetWidth > 0.9 * this.container.offsetWidth) {
+      scale                      = (kbd.offsetWidth/
+                                    this.container.offsetWidth)/0.9;
+    }
+    if (kbd.offsetHeight > 0.9 * this.container.offsetHeight) {
+      scale                      = Math.max((kbd.offsetHeight/
+                                             this.container.offsetHeight)/0.9);
+    }
+    var style                    = this.getTransformStyle(transform,
+                                              scale > 1.0 ? scale : undefined);
+    kbd.style[transform]         = style;
+  }
+  if (transform == 'filter') {
+    scale                        = 1.0;
+  }
+  kbd.style.left                 = ((this.container.offsetWidth -
+                                     kbd.offsetWidth/scale)/2) + 'px';
+  kbd.style.top                  = ((this.container.offsetHeight -
+                                     kbd.offsetHeight/scale)/2) + 'px';
+
+  this.keyboard.style.visibility = 'visible';
+};
+
+VT100.prototype.hideSoftKeyboard = function() {
+  this.keyboard.style.display    = 'none';
+};
+
+VT100.prototype.toggleCursorBlinking = function() {
+  this.blinkingCursor = !this.blinkingCursor;
+};
+
+VT100.prototype.about = function() {
+  alert("VT100 Terminal Emulator " + "2.10 (revision 239)" +
+        "\nCopyright 2008-2010 by Markus Gutschke\n" +
+        "For more information check http://shellinabox.com");
+};
+
+VT100.prototype.hideContextMenu = function() {
+  this.menu.style.visibility = 'hidden';
+  this.menu.style.top        = '-100px';
+  this.menu.style.left       = '-100px';
+  this.menu.style.width      = '0px';
+  this.menu.style.height     = '0px';
+};
+
+VT100.prototype.extendContextMenu = function(entries, actions) {
+};
+
+VT100.prototype.showContextMenu = function(x, y) {
+  this.menu.innerHTML         =
+    '<table class="popup" ' +
+           'cellpadding="0" cellspacing="0">' +
+      '<tr><td>' +
+        '<ul id="menuentries">' +
+          '<li id="beginclipboard">Copy</li>' +
+          '<li id="endclipboard">Paste</li>' +
+          '<li id="browserclipboard">Paste from browser</li>' +
+          '<hr />' +
+          '<li id="reset">Reset</li>' +
+          '<hr />' +
+          '<li id="beginconfig">' +
+             (this.utfEnabled ? '<img src="/webshell/enabled.gif" />' : '') +
+             'Unicode</li>' +
+          '<li>' +
+             (this.visualBell ? '<img src="/webshell/enabled.gif" />' : '') +
+             'Visual Bell</li>'+
+          '<li>' +
+             (this.softKeyboard ? '<img src="/webshell/enabled.gif" />' : '') +
+             'Onscreen Keyboard</li>' +
+          '<li id="endconfig">' +
+             (this.blinkingCursor ? '<img src="/webshell/enabled.gif" />' : '') +
+             'Blinking Cursor</li>'+
+          (this.usercss.firstChild ?
+           '<hr id="beginusercss" />' +
+           this.usercss.innerHTML +
+           '<hr id="endusercss" />' :
+           '<hr />') +
+          '<li id="about">About...</li>' +
+        '</ul>' +
+      '</td></tr>' +
+    '</table>';
+
+  var popup                   = this.menu.firstChild;
+  var menuentries             = this.getChildById(popup, 'menuentries');
+
+  // Determine menu entries that should be disabled
+  this.lastSelection          = this.selection();
+  if (!this.lastSelection.length) {
+    menuentries.firstChild.className
+                              = 'disabled';
+  }
+  var p                       = this.pasteFnc();
+  if (!p) {
+    menuentries.childNodes[1].className
+                              = 'disabled';
+  }
+
+  // Actions for default items
+  var actions                 = [ this.copyLast, p, this.pasteBrowserFnc, this.reset,
+                                  this.toggleUTF, this.toggleBell,
+                                  this.toggleSoftKeyboard,
+                                  this.toggleCursorBlinking ];
+
+  // Actions for user CSS styles (if any)
+  for (var i = 0; i < this.usercssActions.length; ++i) {
+    actions[actions.length]   = this.usercssActions[i];
+  }
+  actions[actions.length]     = this.about;
+
+  // Allow subclasses to dynamically add entries to the context menu
+  this.extendContextMenu(menuentries, actions);
+
+  // Hook up event listeners
+  for (var node = menuentries.firstChild, i = 0; node;
+       node = node.nextSibling) {
+    if (node.tagName == 'LI') {
+      if (node.className != 'disabled') {
+        this.addListener(node, 'mouseover',
+                         function(vt100, node) {
+                           return function() {
+                             node.className = 'hover';
+                           }
+                         }(this, node));
+        this.addListener(node, 'mouseout',
+                         function(vt100, node) {
+                           return function() {
+                             node.className = '';
+                           }
+                         }(this, node));
+        this.addListener(node, 'mousedown',
+                         function(vt100, action) {
+                           return function(event) {
+                             vt100.hideContextMenu();
+                             action.call(vt100);
+                             vt100.storeUserSettings();
+                             return vt100.cancelEvent(event || window.event);
+                           }
+                         }(this, actions[i]));
+        this.addListener(node, 'mouseup',
+                         function(vt100) {
+                           return function(event) {
+                             return vt100.cancelEvent(event || window.event);
+                           }
+                         }(this));
+        this.addListener(node, 'mouseclick',
+                         function(vt100) {
+                           return function(event) {
+                             return vt100.cancelEvent(event || window.event);
+                           }
+                         }());
+      }
+      i++;
+    }
+  }
+
+  // Position menu next to the mouse pointer
+  this.menu.style.left        = '0px';
+  this.menu.style.top         = '0px';
+  this.menu.style.width       =  this.container.offsetWidth  + 'px';
+  this.menu.style.height      =  this.container.offsetHeight + 'px';
+  popup.style.left            = '0px';
+  popup.style.top             = '0px';
+  
+  var margin                  = 2;
+  if (x + popup.clientWidth >= this.container.offsetWidth - margin) {
+    x              = this.container.offsetWidth-popup.clientWidth - margin - 1;
+  }
+  if (x < margin) {
+    x                         = margin;
+  }
+  if (y + popup.clientHeight >= this.container.offsetHeight - margin) {
+    y            = this.container.offsetHeight-popup.clientHeight - margin - 1;
+  }
+  if (y < margin) {
+    y                         = margin;
+  }
+  popup.style.left            = x + 'px';
+  popup.style.top             = y + 'px';
+
+  // Block all other interactions with the terminal emulator
+  this.addListener(this.menu, 'click', function(vt100) {
+                                         return function() {
+                                           vt100.hideContextMenu();
+                                         }
+                                       }(this));
+
+  // Show the menu
+  this.menu.style.visibility  = '';
+};
+
+VT100.prototype.keysPressed = function(ch) {
+  for (var i = 0; i < ch.length; i++) {
+    var c = ch.charCodeAt(i);
+    this.vt100(c >= 7 && c <= 15 ||
+               c == 24 || c == 26 || c == 27 || c >= 32
+               ? String.fromCharCode(c) : '<' + c + '>');
+  }
+};
+
+VT100.prototype.applyModifiers = function(ch, event) {
+  if (ch) {
+    if (event.ctrlKey) {
+      if (ch >= 32 && ch <= 127) {
+        // For historic reasons, some control characters are treated specially
+        switch (ch) {
+        case /* 3 */ 51: ch  =  27; break;
+        case /* 4 */ 52: ch  =  28; break;
+        case /* 5 */ 53: ch  =  29; break;
+        case /* 6 */ 54: ch  =  30; break;
+        case /* 7 */ 55: ch  =  31; break;
+        case /* 8 */ 56: ch  = 127; break;
+        case /* ? */ 63: ch  = 127; break;
+        default:         ch &=  31; break;
+        }
+      }
+    }
+    return String.fromCharCode(ch);
+  } else {
+    return undefined;
+  }
+};
+
+VT100.prototype.handleKey = function(event) {
+  // this.vt100('H: c=' + event.charCode + ', k=' + event.keyCode +
+  //            (event.shiftKey || event.ctrlKey || event.altKey ||
+  //             event.metaKey ? ', ' +
+  //             (event.shiftKey ? 'S' : '') + (event.ctrlKey ? 'C' : '') +
+  //             (event.altKey ? 'A' : '') + (event.metaKey ? 'M' : '') : '') +
+  //            '\r\n');
+  var ch, key;
+  if (typeof event.charCode != 'undefined') {
+    // non-IE keypress events have a translated charCode value. Also, our
+    // fake events generated when receiving keydown events include this data
+    // on all browsers.
+    ch                                = event.charCode;
+    key                               = event.keyCode;
+  } else {
+    // When sending a keypress event, IE includes the translated character
+    // code in the keyCode field.
+    ch                                = event.keyCode;
+    key                               = undefined;
+  }
+
+  // Apply modifier keys (ctrl and shift)
+  if (ch) {
+    key                               = undefined;
+  }
+  ch                                  = this.applyModifiers(ch, event);
+
+  // By this point, "ch" is either defined and contains the character code, or
+  // it is undefined and "key" defines the code of a function key 
+  if (ch != undefined) {
+    this.scrollable.scrollTop         = this.numScrollbackLines *
+                                        this.cursorHeight + 1;
+  } else {
+    if ((event.altKey || event.metaKey) && !event.shiftKey && !event.ctrlKey) {
+      // Many programs have difficulties dealing with parametrized escape
+      // sequences for function keys. Thus, if ALT is the only modifier
+      // key, return Emacs-style keycodes for commonly used keys.
+      switch (key) {
+      case  33: /* Page Up      */ ch = '\u001B<';                      break;
+      case  34: /* Page Down    */ ch = '\u001B>';                      break;
+      case  37: /* Left         */ ch = '\u001Bb';                      break;
+      case  38: /* Up           */ ch = '\u001Bp';                      break;
+      case  39: /* Right        */ ch = '\u001Bf';                      break;
+      case  40: /* Down         */ ch = '\u001Bn';                      break;
+      case  46: /* Delete       */ ch = '\u001Bd';                      break;
+      default:                                                          break;
+      }
+    } else if (event.shiftKey && !event.ctrlKey &&
+               !event.altKey && !event.metaKey) {
+      switch (key) {
+      case  33: /* Page Up      */ this.scrollBack();                   return;
+      case  34: /* Page Down    */ this.scrollFore();                   return;
+      default:                                                          break;
+      }
+    }
+    if (ch == undefined) {
+      switch (key) {
+      case   8: /* Backspace    */ ch = '\u007f';                       break;
+      case   9: /* Tab          */ ch = '\u0009';                       break;
+      case  10: /* Return       */ ch = '\u000A';                       break;
+      case  13: /* Enter        */ ch = this.crLfMode ?
+                                        '\r\n' : '\r';                  break;
+      case  16: /* Shift        */                                      return;
+      case  17: /* Ctrl         */                                      return;
+      case  18: /* Alt          */                                      return;
+      case  19: /* Break        */                                      return;
+      case  20: /* Caps Lock    */                                      return;
+      case  27: /* Escape       */ ch = '\u001B';                       break;
+      case  33: /* Page Up      */ ch = '\u001B[5~';                    break;
+      case  34: /* Page Down    */ ch = '\u001B[6~';                    break;
+      case  35: /* End          */ ch = '\u001BOF';                     break;
+      case  36: /* Home         */ ch = '\u001BOH';                     break;
+      case  37: /* Left         */ ch = this.cursorKeyMode ?
+                             '\u001BOD' : '\u001B[D';                   break;
+      case  38: /* Up           */ ch = this.cursorKeyMode ?
+                             '\u001BOA' : '\u001B[A';                   break;
+      case  39: /* Right        */ ch = this.cursorKeyMode ?
+                             '\u001BOC' : '\u001B[C';                   break;
+      case  40: /* Down         */ ch = this.cursorKeyMode ?
+                             '\u001BOB' : '\u001B[B';                   break;
+      case  45: /* Insert       */ ch = '\u001B[2~';                    break;
+      case  46: /* Delete       */ ch = '\u001B[3~';                    break;
+      case  91: /* Left Window  */                                      return;
+      case  92: /* Right Window */                                      return;
+      case  93: /* Select       */                                      return;
+      case  96: /* 0            */ ch = this.applyModifiers(48, event); break;
+      case  97: /* 1            */ ch = this.applyModifiers(49, event); break;
+      case  98: /* 2            */ ch = this.applyModifiers(50, event); break;
+      case  99: /* 3            */ ch = this.applyModifiers(51, event); break;
+      case 100: /* 4            */ ch = this.applyModifiers(52, event); break;
+      case 101: /* 5            */ ch = this.applyModifiers(53, event); break;
+      case 102: /* 6            */ ch = this.applyModifiers(54, event); break;
+      case 103: /* 7            */ ch = this.applyModifiers(55, event); break;
+      case 104: /* 8            */ ch = this.applyModifiers(56, event); break;
+      case 105: /* 9            */ ch = this.applyModifiers(58, event); break;
+      case 106: /* *            */ ch = this.applyModifiers(42, event); break;
+      case 107: /* +            */ ch = this.applyModifiers(43, event); break;
+      case 109: /* -            */ ch = this.applyModifiers(45, event); break;
+      case 110: /* .            */ ch = this.applyModifiers(46, event); break;
+      case 111: /* /            */ ch = this.applyModifiers(47, event); break;
+      case 112: /* F1           */ ch = '\u001BOP';                     break;
+      case 113: /* F2           */ ch = '\u001BOQ';                     break;
+      case 114: /* F3           */ ch = '\u001BOR';                     break;
+      case 115: /* F4           */ ch = '\u001BOS';                     break;
+      case 116: /* F5           */ ch = '\u001B[15~';                   break;
+      case 117: /* F6           */ ch = '\u001B[17~';                   break;
+      case 118: /* F7           */ ch = '\u001B[18~';                   break;
+      case 119: /* F8           */ ch = '\u001B[19~';                   break;
+      case 120: /* F9           */ ch = '\u001B[20~';                   break;
+      case 121: /* F10          */ ch = '\u001B[21~';                   break;
+      case 122: /* F11          */ ch = '\u001B[23~';                   break;
+      case 123: /* F12          */ ch = '\u001B[24~';                   break;
+      case 144: /* Num Lock     */                                      return;
+      case 145: /* Scroll Lock  */                                      return;
+      case 186: /* ;            */ ch = this.applyModifiers(59, event); break;
+      case 187: /* =            */ ch = this.applyModifiers(61, event); break;
+      case 188: /* ,            */ ch = this.applyModifiers(44, event); break;
+      case 189: /* -            */ ch = this.applyModifiers(45, event); break;
+      case 173: /* -            */ ch = this.applyModifiers(45, event); break; // FF15 Patch
+      case 190: /* .            */ ch = this.applyModifiers(46, event); break;
+      case 191: /* /            */ ch = this.applyModifiers(47, event); break;
+      // Conflicts with dead key " on Swiss keyboards
+      //case 192: /* `            */ ch = this.applyModifiers(96, event); break;
+      // Conflicts with dead key " on Swiss keyboards
+      //case 219: /* [            */ ch = this.applyModifiers(91, event); break;
+      case 220: /* \            */ ch = this.applyModifiers(92, event); break;
+      // Conflicts with dead key ^ and ` on Swiss keaboards
+      //                         ^ and " on French keyboards
+      //case 221: /* ]            */ ch = this.applyModifiers(93, event); break;
+      case 222: /* '            */ ch = this.applyModifiers(39, event); break;
+      default:                                                          return;
+      }
+      this.scrollable.scrollTop       = this.numScrollbackLines *
+                                        this.cursorHeight + 1;
+    }
+  }
+
+  // "ch" now contains the sequence of keycodes to send. But we might still
+  // have to apply the effects of modifier keys.
+  if (event.shiftKey || event.ctrlKey || event.altKey || event.metaKey) {
+    var start, digit, part1, part2;
+    if ((start = ch.substr(0, 2)) == '\u001B[') {
+      for (part1 = start;
+           part1.length < ch.length &&
+             (digit = ch.charCodeAt(part1.length)) >= 48 && digit <= 57; ) {
+        part1                         = ch.substr(0, part1.length + 1);
+      }
+      part2                           = ch.substr(part1.length);
+      if (part1.length > 2) {
+        part1                        += ';';
+      }
+    } else if (start == '\u001BO') {
+      part1                           = start;
+      part2                           = ch.substr(2);
+    }
+    if (part1 != undefined) {
+      ch                              = part1                                 +
+                                       ((event.shiftKey             ? 1 : 0)  +
+                                        (event.altKey|event.metaKey ? 2 : 0)  +
+                                        (event.ctrlKey              ? 4 : 0)) +
+                                        part2;
+    } else if (ch.length == 1 && (event.altKey || event.metaKey)) {
+      ch                              = '\u001B' + ch;
+    }
+  }
+
+  if (this.menu.style.visibility == 'hidden') {
+    // this.vt100('R: c=');
+    // for (var i = 0; i < ch.length; i++)
+    //   this.vt100((i != 0 ? ', ' : '') + ch.charCodeAt(i));
+    // this.vt100('\r\n');
+    this.keysPressed(ch);
+  }
+};
+
+VT100.prototype.inspect = function(o, d) {
+  if (d == undefined) {
+    d       = 0;
+  }
+  var rc    = '';
+  if (typeof o == 'object' && ++d < 2) {
+    rc      = '[\r\n';
+    for (i in o) {
+      rc   += this.spaces(d * 2) + i + ' -> ';
+      try {
+        rc += this.inspect(o[i], d);
+      } catch (e) {
+        rc += '?' + '?' + '?\r\n';
+      }
+    }
+    rc     += ']\r\n';
+  } else {
+    rc     += ('' + o).replace(/\n/g, ' ').replace(/ +/g,' ') + '\r\n';
+  }
+  return rc;
+};
+
+VT100.prototype.checkComposedKeys = function(event) {
+  // Composed keys (at least on Linux) do not generate normal events.
+  // Instead, they get entered into the text field. We normally catch
+  // this on the next keyup event.
+  var s              = this.input.value;
+  if (s.length) {
+    this.input.value = '';
+    if (this.menu.style.visibility == 'hidden') {
+      this.keysPressed(s);
+    }
+  }
+};
+
+VT100.prototype.fixEvent = function(event) {
+  // Some browsers report AltGR as a combination of ALT and CTRL. As AltGr
+  // is used as a second-level selector, clear the modifier bits before
+  // handling the event.
+  if (event.ctrlKey && event.altKey) {
+    var fake                = [ ];
+    fake.charCode           = event.charCode;
+    fake.keyCode            = event.keyCode;
+    fake.ctrlKey            = false;
+    fake.shiftKey           = event.shiftKey;
+    fake.altKey             = false;
+    fake.metaKey            = event.metaKey;
+    return fake;
+  }
+
+  // Some browsers fail to translate keys, if both shift and alt/meta is
+  // pressed at the same time. We try to translate those cases, but that
+  // only works for US keyboard layouts.
+  if (event.shiftKey) {
+    var u                   = undefined;
+    var s                   = undefined;
+    switch (this.lastNormalKeyDownEvent.keyCode) {
+    case  39: /* ' -> " */ u = 39; s =  34; break;
+    case  44: /* , -> < */ u = 44; s =  60; break;
+    case  45: /* - -> _ */ u = 45; s =  95; break;
+    case  46: /* . -> > */ u = 46; s =  62; break;
+    case  47: /* / -> ? */ u = 47; s =  63; break;
+
+    case  48: /* 0 -> ) */ u = 48; s =  41; break;
+    case  49: /* 1 -> ! */ u = 49; s =  33; break;
+    case  50: /* 2 -> @ */ u = 50; s =  64; break;
+    case  51: /* 3 -> # */ u = 51; s =  35; break;
+    case  52: /* 4 -> $ */ u = 52; s =  36; break;
+    case  53: /* 5 -> % */ u = 53; s =  37; break;
+    case  54: /* 6 -> ^ */ u = 54; s =  94; break;
+    case  55: /* 7 -> & */ u = 55; s =  38; break;
+    case  56: /* 8 -> * */ u = 56; s =  42; break;
+    case  57: /* 9 -> ( */ u = 57; s =  40; break;
+
+    case  59: /* ; -> : */ u = 59; s =  58; break;
+    case  61: /* = -> + */ u = 61; s =  43; break;
+    case  91: /* [ -> { */ u = 91; s = 123; break;
+    case  92: /* \ -> | */ u = 92; s = 124; break;
+    case  93: /* ] -> } */ u = 93; s = 125; break; 
+    case  96: /* ` -> ~ */ u = 96; s = 126; break;
+
+    case 109: /* - -> _ */ u = 45; s =  95; break;
+    case 111: /* / -> ? */ u = 47; s =  63; break;
+
+    case 186: /* ; -> : */ u = 59; s =  58; break;
+    case 187: /* = -> + */ u = 61; s =  43; break;
+    case 188: /* , -> < */ u = 44; s =  60; break;
+    case 189: /* - -> _ */ u = 45; s =  95; break;
+    case 173: /* - -> _ */ u = 45; s =  95; break; // FF15 Patch
+    case 190: /* . -> > */ u = 46; s =  62; break;
+    case 191: /* / -> ? */ u = 47; s =  63; break;
+    case 192: /* ` -> ~ */ u = 96; s = 126; break;
+    case 219: /* [ -> { */ u = 91; s = 123; break;
+    case 220: /* \ -> | */ u = 92; s = 124; break;
+    case 221: /* ] -> } */ u = 93; s = 125; break; 
+    case 222: /* ' -> " */ u = 39; s =  34; break;
+    default:                                break;
+    }
+    if (s && (event.charCode == u || event.charCode == 0)) {
+      var fake              = [ ];
+      fake.charCode         = s;
+      fake.keyCode          = event.keyCode;
+      fake.ctrlKey          = event.ctrlKey;
+      fake.shiftKey         = event.shiftKey;
+      fake.altKey           = event.altKey;
+      fake.metaKey          = event.metaKey;
+      return fake;
+    }
+  }
+  return event;
+};
+
+VT100.prototype.keyDown = function(event) {
+  // this.vt100('D: c=' + event.charCode + ', k=' + event.keyCode +
+  //            (event.shiftKey || event.ctrlKey || event.altKey ||
+  //             event.metaKey ? ', ' +
+  //             (event.shiftKey ? 'S' : '') + (event.ctrlKey ? 'C' : '') +
+  //             (event.altKey ? 'A' : '') + (event.metaKey ? 'M' : '') : '') +
+  //            '\r\n');
+  this.checkComposedKeys(event);
+  this.lastKeyPressedEvent      = undefined;
+  this.lastKeyDownEvent         = undefined;
+  this.lastNormalKeyDownEvent   = event;
+
+  // Swiss keyboard conflicts:
+  // [ 59
+  // ] 192
+  // ' 219 (dead key)
+  // { 220
+  // ~ 221 (dead key)
+  // } 223
+  // French keyoard conflicts:
+  // ~ 50 (dead key)
+  // } 107
+  var asciiKey                  =
+    event.keyCode ==  32                         ||
+    event.keyCode >=  48 && event.keyCode <=  57 ||
+    event.keyCode >=  65 && event.keyCode <=  90;
+  var alphNumKey                =
+    asciiKey                                     ||
+    event.keyCode ==  59 ||
+    event.keyCode >=  96 && event.keyCode <= 105 ||
+    event.keyCode == 107 ||
+    event.keyCode == 192 ||
+    event.keyCode >= 219 && event.keyCode <= 221 ||
+    event.keyCode == 223 ||
+    event.keyCode == 226;
+  var normalKey                 =
+    alphNumKey                                   ||
+    event.keyCode ==  61 ||
+    event.keyCode == 106 ||
+    event.keyCode >= 109 && event.keyCode <= 111 ||
+    event.keyCode >= 186 && event.keyCode <= 191 ||
+    event.keyCode == 222 ||
+    event.keyCode == 252;
+  try {
+    if (navigator.appName == 'Konqueror') {
+      normalKey                |= event.keyCode < 128;
+    }
+  } catch (e) {
+  }
+
+  // We normally prefer to look at keypress events, as they perform the
+  // translation from keyCode to charCode. This is important, as the
+  // translation is locale-dependent.
+  // But for some keys, we must intercept them during the keydown event,
+  // as they would otherwise get interpreted by the browser.
+  // Even, when doing all of this, there are some keys that we can never
+  // intercept. This applies to some of the menu navigation keys in IE.
+  // In fact, we see them, but we cannot stop IE from seeing them, too.
+  if ((event.charCode || event.keyCode) &&
+      ((alphNumKey && (event.ctrlKey || event.altKey || event.metaKey) &&
+        !event.shiftKey &&
+        // Some browsers signal AltGR as both CTRL and ALT. Do not try to
+        // interpret this sequence ourselves, as some keyboard layouts use
+        // it for second-level layouts.
+        !(event.ctrlKey && event.altKey)) ||
+       this.catchModifiersEarly && normalKey && !alphNumKey &&
+       (event.ctrlKey || event.altKey || event.metaKey) ||
+       !normalKey)) {
+    this.lastKeyDownEvent       = event;
+    var fake                    = [ ];
+    fake.ctrlKey                = event.ctrlKey;
+    fake.shiftKey               = event.shiftKey;
+    fake.altKey                 = event.altKey;
+    fake.metaKey                = event.metaKey;
+    if (asciiKey) {
+      fake.charCode             = event.keyCode;
+      fake.keyCode              = 0;
+    } else {
+      fake.charCode             = 0;
+      fake.keyCode              = event.keyCode;
+      if (!alphNumKey && event.shiftKey) {
+        fake                    = this.fixEvent(fake);
+      }
+    }
+
+    this.handleKey(fake);
+    this.lastNormalKeyDownEvent = undefined;
+
+    try {
+      // For non-IE browsers
+      event.stopPropagation();
+      event.preventDefault();
+    } catch (e) {
+    }
+    try {
+      // For IE
+      event.cancelBubble = true;
+      event.returnValue  = false;
+      event.keyCode      = 0;
+    } catch (e) {
+    }
+
+    return false;
+  }
+  return true;
+};
+
+VT100.prototype.keyPressed = function(event) {
+  // this.vt100('P: c=' + event.charCode + ', k=' + event.keyCode +
+  //            (event.shiftKey || event.ctrlKey || event.altKey ||
+  //             event.metaKey ? ', ' +
+  //             (event.shiftKey ? 'S' : '') + (event.ctrlKey ? 'C' : '') +
+  //             (event.altKey ? 'A' : '') + (event.metaKey ? 'M' : '') : '') +
+  //            '\r\n');
+  if (this.lastKeyDownEvent) {
+    // If we already processed the key on keydown, do not process it
+    // again here. Ideally, the browser should not even have generated a
+    // keypress event in this case. But that does not appear to always work.
+    this.lastKeyDownEvent     = undefined;
+  } else {
+    this.handleKey(event.altKey || event.metaKey
+                   ? this.fixEvent(event) : event);
+  }
+
+  try {
+    // For non-IE browsers
+    event.preventDefault();
+  } catch (e) {
+  }
+
+  try {
+    // For IE
+    event.cancelBubble = true;
+    event.returnValue  = false;
+    event.keyCode      = 0;
+  } catch (e) {
+  }
+
+  this.lastNormalKeyDownEvent = undefined;
+  this.lastKeyPressedEvent    = event;
+  return false;
+};
+
+VT100.prototype.keyUp = function(event) {
+  // this.vt100('U: c=' + event.charCode + ', k=' + event.keyCode +
+  //            (event.shiftKey || event.ctrlKey || event.altKey ||
+  //             event.metaKey ? ', ' +
+  //             (event.shiftKey ? 'S' : '') + (event.ctrlKey ? 'C' : '') +
+  //             (event.altKey ? 'A' : '') + (event.metaKey ? 'M' : '') : '') +
+  //            '\r\n');
+  if (this.lastKeyPressedEvent) {
+    // The compose key on Linux occasionally confuses the browser and keeps
+    // inserting bogus characters into the input field, even if just a regular
+    // key has been pressed. Detect this case and drop the bogus characters.
+    (event.target ||
+     event.srcElement).value      = '';
+  } else {
+    // This is usually were we notice that a key has been composed and
+    // thus failed to generate normal events.
+    this.checkComposedKeys(event);
+
+    // Some browsers don't report keypress events if ctrl or alt is pressed
+    // for non-alphanumerical keys. Patch things up for now, but in the
+    // future we will catch these keys earlier (in the keydown handler).
+    if (this.lastNormalKeyDownEvent) {
+      // this.vt100('ENABLING EARLY CATCHING OF MODIFIER KEYS\r\n');
+      this.catchModifiersEarly    = true;
+      var asciiKey                =
+        event.keyCode ==  32                         ||
+        // Conflicts with dead key ~ (code 50) on French keyboards
+        //event.keyCode >=  48 && event.keyCode <=  57 ||
+        event.keyCode >=  48 && event.keyCode <=  49 ||
+        event.keyCode >=  51 && event.keyCode <=  57 ||
+        event.keyCode >=  65 && event.keyCode <=  90;
+      var alphNumKey              =
+        asciiKey                                     ||
+        event.keyCode ==  50                         ||
+        event.keyCode >=  96 && event.keyCode <= 105;
+      var normalKey               =
+        alphNumKey                                   ||
+        event.keyCode ==  59 || event.keyCode ==  61 ||
+        event.keyCode == 106 || event.keyCode == 107 ||
+        event.keyCode >= 109 && event.keyCode <= 111 ||
+        event.keyCode >= 186 && event.keyCode <= 192 ||
+        event.keyCode >= 219 && event.keyCode <= 223 ||
+        event.keyCode == 252;
+      var fake                    = [ ];
+      fake.ctrlKey                = event.ctrlKey;
+      fake.shiftKey               = event.shiftKey;
+      fake.altKey                 = event.altKey;
+      fake.metaKey                = event.metaKey;
+      if (asciiKey) {
+        fake.charCode             = event.keyCode;
+        fake.keyCode              = 0;
+      } else {
+        fake.charCode             = 0;
+        fake.keyCode              = event.keyCode;
+        if (!alphNumKey && (event.ctrlKey || event.altKey || event.metaKey)) {
+          fake                    = this.fixEvent(fake);
+        }
+      }
+      this.lastNormalKeyDownEvent = undefined;
+      this.handleKey(fake);
+    }
+  }
+
+  try {
+    // For IE
+    event.cancelBubble            = true;
+    event.returnValue             = false;
+    event.keyCode                 = 0;
+  } catch (e) {
+  }
+
+  this.lastKeyDownEvent           = undefined;
+  this.lastKeyPressedEvent        = undefined;
+  return false;
+};
+
+VT100.prototype.animateCursor = function(inactive) {
+  if (!this.cursorInterval) {
+    this.cursorInterval       = setInterval(
+      function(vt100) {
+        return function() {
+          vt100.animateCursor();
+
+          // Use this opportunity to check whether the user entered a composed
+          // key, or whether somebody pasted text into the textfield.
+          vt100.checkComposedKeys();
+        }
+      }(this), 500);
+  }
+  if (inactive != undefined || this.cursor.className != 'inactive') {
+    if (inactive) {
+      this.cursor.className   = 'inactive';
+    } else {
+      if (this.blinkingCursor) {
+        this.cursor.className = this.cursor.className == 'bright'
+                                ? 'dim' : 'bright';
+      } else {
+        this.cursor.className = 'bright';
+      }
+    }
+  }
+};
+
+VT100.prototype.blurCursor = function() {
+  this.animateCursor(true);
+};
+
+VT100.prototype.focusCursor = function() {
+  this.animateCursor(false);
+};
+
+VT100.prototype.flashScreen = function() {
+  this.isInverted       = !this.isInverted;
+  this.refreshInvertedState();
+  this.isInverted       = !this.isInverted;
+  setTimeout(function(vt100) {
+               return function() {
+                 vt100.refreshInvertedState();
+               };
+             }(this), 100);
+};
+
+VT100.prototype.beep = function() {
+  if (this.visualBell) {
+    this.flashScreen();
+  } else {
+    try {
+      this.beeper.Play();
+    } catch (e) {
+      try {
+        this.beeper.src = 'beep.wav';
+      } catch (e) {
+      }
+    }
+  }
+};
+
+VT100.prototype.bs = function() {
+  if (this.cursorX > 0) {
+    this.gotoXY(this.cursorX - 1, this.cursorY);
+    this.needWrap = false;
+  }
+};
+
+VT100.prototype.ht = function(count) {
+  if (count == undefined) {
+    count        = 1;
+  }
+  var cx         = this.cursorX;
+  while (count-- > 0) {
+    while (cx++ < this.terminalWidth) {
+      var tabState = this.userTabStop[cx];
+      if (tabState == false) {
+        // Explicitly cleared tab stop
+        continue;
+      } else if (tabState) {
+        // Explicitly set tab stop
+        break;
+      } else {
+        // Default tab stop at each eighth column
+        if (cx % 8 == 0) {
+          break;
+        }
+      }
+    }
+  }
+  if (cx > this.terminalWidth - 1) {
+    cx           = this.terminalWidth - 1;
+  }
+  if (cx != this.cursorX) {
+    this.gotoXY(cx, this.cursorY);
+  }
+};
+
+VT100.prototype.rt = function(count) {
+  if (count == undefined) {
+    count          = 1 ;
+  }
+  var cx           = this.cursorX;
+  while (count-- > 0) {
+    while (cx-- > 0) {
+      var tabState = this.userTabStop[cx];
+      if (tabState == false) {
+        // Explicitly cleared tab stop
+        continue;
+      } else if (tabState) {
+        // Explicitly set tab stop
+        break;
+      } else {
+        // Default tab stop at each eighth column
+        if (cx % 8 == 0) {
+          break;
+        }
+      }
+    }
+  }
+  if (cx < 0) {
+    cx             = 0;
+  }
+  if (cx != this.cursorX) {
+    this.gotoXY(cx, this.cursorY);
+  }
+};
+
+VT100.prototype.cr = function() {
+  this.gotoXY(0, this.cursorY);
+  this.needWrap = false;
+};
+
+VT100.prototype.lf = function(count) {
+  if (count == undefined) {
+    count    = 1;
+  } else {
+    if (count > this.terminalHeight) {
+      count  = this.terminalHeight;
+    }
+    if (count < 1) {
+      count  = 1;
+    }
+  }
+  while (count-- > 0) {
+    if (this.cursorY == this.bottom - 1) {
+      this.scrollRegion(0, this.top + 1,
+                        this.terminalWidth, this.bottom - this.top - 1,
+                        0, -1, this.color, this.style);
+      offset = undefined;
+    } else if (this.cursorY < this.terminalHeight - 1) {
+      this.gotoXY(this.cursorX, this.cursorY + 1);
+    }
+  }
+};
+
+VT100.prototype.ri = function(count) {
+  if (count == undefined) {
+    count   = 1;
+  } else {
+    if (count > this.terminalHeight) {
+      count = this.terminalHeight;
+    }
+    if (count < 1) {
+      count = 1;
+    }
+  }
+  while (count-- > 0) {
+    if (this.cursorY == this.top) {
+      this.scrollRegion(0, this.top,
+                        this.terminalWidth, this.bottom - this.top - 1,
+                        0, 1, this.color, this.style);
+    } else if (this.cursorY > 0) {
+      this.gotoXY(this.cursorX, this.cursorY - 1);
+    }
+  }
+  this.needWrap = false;
+};
+
+VT100.prototype.respondID = function() {
+  this.respondString += '\u001B[?6c';
+};
+
+VT100.prototype.respondSecondaryDA = function() {
+  this.respondString += '\u001B[>0;0;0c';
+};
+
+
+VT100.prototype.updateStyle = function() {
+  this.style   = '';
+  if (this.attr & 0x0200 /* ATTR_UNDERLINE */) {
+    this.style = 'text-decoration: underline;';
+  }
+  var bg       = (this.attr >> 4) & 0xF;
+  var fg       =  this.attr       & 0xF;
+  if (this.attr & 0x0100 /* ATTR_REVERSE */) {
+    var tmp    = bg;
+    bg         = fg;
+    fg         = tmp;
+  }
+  if ((this.attr & (0x0100 /* ATTR_REVERSE */ | 0x0400 /* ATTR_DIM */)) == 0x0400 /* ATTR_DIM */) {
+    fg         = 8; // Dark grey
+  } else if (this.attr & 0x0800 /* ATTR_BRIGHT */) {
+    fg        |= 8;
+    this.style = 'font-weight: bold;';
+  }
+  if (this.attr & 0x1000 /* ATTR_BLINK */) {
+    this.style = 'text-decoration: blink;';
+  }
+  this.color   = 'ansi' + fg + ' bgAnsi' + bg;
+};
+
+VT100.prototype.setAttrColors = function(attr) {
+  if (attr != this.attr) {
+    this.attr = attr;
+    this.updateStyle();
+  }
+};
+
+VT100.prototype.saveCursor = function() {
+  this.savedX[this.currentScreen]     = this.cursorX;
+  this.savedY[this.currentScreen]     = this.cursorY;
+  this.savedAttr[this.currentScreen]  = this.attr;
+  this.savedUseGMap                   = this.useGMap;
+  for (var i = 0; i < 4; i++) {
+    this.savedGMap[i]                 = this.GMap[i];
+  }
+  this.savedValid[this.currentScreen] = true;
+};
+
+VT100.prototype.restoreCursor = function() {
+  if (!this.savedValid[this.currentScreen]) {
+    return;
+  }
+  this.attr      = this.savedAttr[this.currentScreen];
+  this.updateStyle();
+  this.useGMap   = this.savedUseGMap;
+  for (var i = 0; i < 4; i++) {
+    this.GMap[i] = this.savedGMap[i];
+  }
+  this.translate = this.GMap[this.useGMap];
+  this.needWrap  = false;
+  this.gotoXY(this.savedX[this.currentScreen],
+              this.savedY[this.currentScreen]);
+};
+
+VT100.prototype.getTransformName = function() {
+  var styles = [ 'transform', 'WebkitTransform', 'MozTransform', 'filter' ];
+  for (var i = 0; i < styles.length; ++i) {
+    if (typeof this.console[0].style[styles[i]] != 'undefined') {
+      return styles[i];
+    }
+  }
+  return undefined;
+};
+
+VT100.prototype.getTransformStyle = function(transform, scale) {
+  return scale && scale != 1.0
+    ? transform == 'filter'
+      ? 'progid:DXImageTransform.Microsoft.Matrix(' +
+                                 'M11=' + (1.0/scale) + ',M12=0,M21=0,M22=1,' +
+                                 "sizingMethod='auto expand')"
+      : 'translateX(-50%) ' +
+        'scaleX(' + (1.0/scale) + ') ' +
+        'translateX(50%)'
+    : '';
+};
+
+VT100.prototype.set80_132Mode = function(state) {
+  var transform                  = this.getTransformName();
+  if (transform) {
+    if ((this.console[this.currentScreen].style[transform] != '') == state) {
+      return;
+    }
+    var style                    = state ?
+                                   this.getTransformStyle(transform, 1.65):'';
+    this.console[this.currentScreen].style[transform] = style;
+    this.cursor.style[transform] = style;
+    this.space.style[transform]  = style;
+    this.scale                   = state ? 1.65 : 1.0;
+    if (transform == 'filter') {
+      this.console[this.currentScreen].style.width = state ? '165%' : '';
+    }
+    this.resizer();
+  }
+};
+
+VT100.prototype.setMode = function(state) {
+  for (var i = 0; i <= this.npar; i++) {
+    if (this.isQuestionMark) {
+      switch (this.par[i]) {
+      case  1: this.cursorKeyMode      = state;                      break;
+      case  3: this.set80_132Mode(state);                            break;
+      case  5: this.isInverted = state; this.refreshInvertedState(); break;
+      case  6: this.offsetMode         = state;                      break;
+      case  7: this.autoWrapMode       = state;                      break;
+      case 1000:
+      case  9: this.mouseReporting     = state;                      break;
+      case 25: this.cursorNeedsShowing = state;
+               if (state) { this.showCursor(); }
+               else       { this.hideCursor(); }                     break;
+      case 1047:
+      case 1049:
+      case 47: this.enableAlternateScreen(state);                    break;
+      default:                                                       break;
+      }
+    } else {
+      switch (this.par[i]) {
+      case  3: this.dispCtrl           = state;                      break;
+      case  4: this.insertMode         = state;                      break;
+      case  20:this.crLfMode           = state;                      break;
+      default:                                                       break;
+      }
+    }
+  }
+};
+
+VT100.prototype.statusReport = function() {
+  // Ready and operational.
+  this.respondString += '\u001B[0n';
+};
+
+VT100.prototype.cursorReport = function() {
+  this.respondString += '\u001B[' +
+                        (this.cursorY + (this.offsetMode ? this.top + 1 : 1)) +
+                        ';' +
+                        (this.cursorX + 1) +
+                        'R';
+};
+
+VT100.prototype.setCursorAttr = function(setAttr, xorAttr) {
+  // Changing of cursor color is not implemented.
+};
+
+VT100.prototype.openPrinterWindow = function() {
+  var rc            = true;
+  try {
+    if (!this.printWin || this.printWin.closed) {
+      this.printWin = window.open('', 'print-output',
+        'width=800,height=600,directories=no,location=no,menubar=yes,' +
+        'status=no,toolbar=no,titlebar=yes,scrollbars=yes,resizable=yes');
+      this.printWin.document.body.innerHTML =
+        '<link rel="stylesheet" href="' +
+          document.location.protocol + '//' + document.location.host +
+          document.location.pathname.replace(/[^/]*$/, '') +
+          'print-styles.css" type="text/css">\n' +
+        '<div id="options"><input id="autoprint" type="checkbox"' +
+          (this.autoprint ? ' checked' : '') + '>' +
+          'Automatically, print page(s) when job is ready' +
+        '</input></div>\n' +
+        '<div id="spacer"><input type="checkbox">&nbsp;</input></div>' +
+        '<pre id="print"></pre>\n';
+      var autoprint = this.printWin.document.getElementById('autoprint');
+      this.addListener(autoprint, 'click',
+                       (function(vt100, autoprint) {
+                         return function() {
+                           vt100.autoprint = autoprint.checked;
+                           vt100.storeUserSettings();
+                           return false;
+                         };
+                       })(this, autoprint));
+      this.printWin.document.title = 'ShellInABox Printer Output';
+    }
+  } catch (e) {
+    // Maybe, a popup blocker prevented us from working. Better catch the
+    // exception, so that we won't break the entire terminal session. The
+    // user probably needs to disable the blocker first before retrying the
+    // operation.
+    rc              = false;
+  }
+  rc               &= this.printWin && !this.printWin.closed &&
+                      (this.printWin.innerWidth ||
+                       this.printWin.document.documentElement.clientWidth ||
+                       this.printWin.document.body.clientWidth) > 1;
+
+  if (!rc && this.printing == 100) {
+    // Different popup blockers work differently. We try to detect a couple
+    // of common methods. And then we retry again a brief amount later, as
+    // false positives are otherwise possible. If we are sure that there is
+    // a popup blocker in effect, we alert the user to it. This is helpful
+    // as some popup blockers have minimal or no UI, and the user might not
+    // notice that they are missing the popup. In any case, we only show at
+    // most one message per print job.
+    this.printing   = true;
+    setTimeout((function(win) {
+                  return function() {
+                    if (!win || win.closed ||
+                        (win.innerWidth ||
+                         win.document.documentElement.clientWidth ||
+                         win.document.body.clientWidth) <= 1) {
+                      alert('Attempted to print, but a popup blocker ' +
+                            'prevented the printer window from opening');
+                    }
+                  };
+                })(this.printWin), 2000);
+  }
+  return rc;
+};
+
+VT100.prototype.sendToPrinter = function(s) {
+  this.openPrinterWindow();
+  try {
+    var doc   = this.printWin.document;
+    var print = doc.getElementById('print');
+    if (print.lastChild && print.lastChild.nodeName == '#text') {
+      print.lastChild.textContent += this.replaceChar(s, ' ', '\u00A0');
+    } else {
+      print.appendChild(doc.createTextNode(this.replaceChar(s, ' ','\u00A0')));
+    }
+  } catch (e) {
+    // There probably was a more aggressive popup blocker that prevented us
+    // from accessing the printer windows.
+  }
+};
+
+VT100.prototype.sendControlToPrinter = function(ch) {
+  // We get called whenever doControl() is active. But for the printer, we
+  // only implement a basic line printer that doesn't understand most of
+  // the escape sequences of the VT100 terminal. In fact, the only escape
+  // sequence that we really need to recognize is '^[[5i' for turning the
+  // printer off.
+  try {
+    switch (ch) {
+    case  9:
+      // HT
+      this.openPrinterWindow();
+      var doc                 = this.printWin.document;
+      var print               = doc.getElementById('print');
+      var chars               = print.lastChild &&
+                                print.lastChild.nodeName == '#text' ?
+                                print.lastChild.textContent.length : 0;
+      this.sendToPrinter(this.spaces(8 - (chars % 8)));
+      break;
+    case 10:
+      // CR
+      break;
+    case 12:
+      // FF
+      this.openPrinterWindow();
+      var pageBreak           = this.printWin.document.createElement('div');
+      pageBreak.className     = 'pagebreak';
+      pageBreak.innerHTML     = '<hr />';
+      this.printWin.document.getElementById('print').appendChild(pageBreak);
+      break;
+    case 13:
+      // LF
+      this.openPrinterWindow();
+      var lineBreak           = this.printWin.document.createElement('br');
+      this.printWin.document.getElementById('print').appendChild(lineBreak);
+      break;
+    case 27:
+      // ESC
+      this.isEsc              = 1 /* ESesc */;
+      break;
+    default:
+      switch (this.isEsc) {
+      case 1 /* ESesc */:
+        this.isEsc            = 0 /* ESnormal */;
+        switch (ch) {
+        case 0x5B /*[*/:
+          this.isEsc          = 2 /* ESsquare */;
+          break;
+        default:
+          break;
+        }
+        break;
+      case 2 /* ESsquare */:
+        this.npar             = 0;
+        this.par              = [ 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0, 0, 0, 0, 0, 0, 0, 0 ];
+        this.isEsc            = 3 /* ESgetpars */;
+        this.isQuestionMark   = ch == 0x3F /*?*/;
+        if (this.isQuestionMark) {
+          break;
+        }
+        // Fall through
+      case 3 /* ESgetpars */: 
+        if (ch == 0x3B /*;*/) {
+          this.npar++;
+          break;
+        } else if (ch >= 0x30 /*0*/ && ch <= 0x39 /*9*/) {
+          var par             = this.par[this.npar];
+          if (par == undefined) {
+            par               = 0;
+          }
+          this.par[this.npar] = 10*par + (ch & 0xF);
+          break;
+        } else {
+          this.isEsc          = 4 /* ESgotpars */;
+        }
+        // Fall through
+      case 4 /* ESgotpars */:
+        this.isEsc            = 0 /* ESnormal */;
+        if (this.isQuestionMark) {
+          break;
+        }
+        switch (ch) {
+        case 0x69 /*i*/:
+          this.csii(this.par[0]);
+          break;
+        default:
+          break;
+        }
+        break;
+      default:
+        this.isEsc            = 0 /* ESnormal */;
+        break;
+      }
+      break;
+    }
+  } catch (e) {
+    // There probably was a more aggressive popup blocker that prevented us
+    // from accessing the printer windows.
+  }
+};
+
+VT100.prototype.csiAt = function(number) {
+  // Insert spaces
+  if (number == 0) {
+    number      = 1;
+  }
+  if (number > this.terminalWidth - this.cursorX) {
+    number      = this.terminalWidth - this.cursorX;
+  }
+  this.scrollRegion(this.cursorX, this.cursorY,
+                    this.terminalWidth - this.cursorX - number, 1,
+                    number, 0, this.color, this.style);
+  this.needWrap = false;
+};
+
+VT100.prototype.csii = function(number) {
+  // Printer control
+  switch (number) {
+  case 0: // Print Screen
+    window.print();
+    break;
+  case 4: // Stop printing
+    try {
+      if (this.printing && this.printWin && !this.printWin.closed) {
+        var print = this.printWin.document.getElementById('print');
+        while (print.lastChild &&
+               print.lastChild.tagName == 'DIV' &&
+               print.lastChild.className == 'pagebreak') {
+          // Remove trailing blank pages
+          print.removeChild(print.lastChild);
+        }
+        if (this.autoprint) {
+          this.printWin.print();
+        }
+      }
+    } catch (e) {
+    }
+    this.printing = false;
+    break;
+  case 5: // Start printing
+    if (!this.printing && this.printWin && !this.printWin.closed) {
+      this.printWin.document.getElementById('print').innerHTML = '';
+    }
+    this.printing = 100;
+    break;
+  default:
+    break;
+  }
+};
+
+VT100.prototype.csiJ = function(number) {
+  switch (number) {
+  case 0: // Erase from cursor to end of display
+    this.clearRegion(this.cursorX, this.cursorY,
+                     this.terminalWidth - this.cursorX, 1,
+                     this.color, this.style);
+    if (this.cursorY < this.terminalHeight-2) {
+      this.clearRegion(0, this.cursorY+1,
+                       this.terminalWidth, this.terminalHeight-this.cursorY-1,
+                       this.color, this.style);
+    }
+    break;
+  case 1: // Erase from start to cursor
+    if (this.cursorY > 0) {
+      this.clearRegion(0, 0,
+                       this.terminalWidth, this.cursorY,
+                       this.color, this.style);
+    }
+    this.clearRegion(0, this.cursorY, this.cursorX + 1, 1,
+                     this.color, this.style);
+    break;
+  case 2: // Erase whole display
+    this.clearRegion(0, 0, this.terminalWidth, this.terminalHeight,
+                     this.color, this.style);
+    break;
+  default:
+    return;
+  }
+  needWrap = false;
+};
+
+VT100.prototype.csiK = function(number) {
+  switch (number) {
+  case 0: // Erase from cursor to end of line
+    this.clearRegion(this.cursorX, this.cursorY,
+                     this.terminalWidth - this.cursorX, 1,
+                     this.color, this.style);
+    break;
+  case 1: // Erase from start of line to cursor
+    this.clearRegion(0, this.cursorY, this.cursorX + 1, 1,
+                     this.color, this.style);
+    break;
+  case 2: // Erase whole line
+    this.clearRegion(0, this.cursorY, this.terminalWidth, 1,
+                     this.color, this.style);
+    break;
+  default:
+    return;
+  }
+  needWrap = false;
+};
+
+VT100.prototype.csiL = function(number) {
+  // Open line by inserting blank line(s)
+  if (this.cursorY >= this.bottom) {
+    return;
+  }
+  if (number == 0) {
+    number = 1;
+  }
+  if (number > this.bottom - this.cursorY) {
+    number = this.bottom - this.cursorY;
+  }
+  this.scrollRegion(0, this.cursorY,
+                    this.terminalWidth, this.bottom - this.cursorY - number,
+                    0, number, this.color, this.style);
+  needWrap = false;
+};
+
+VT100.prototype.csiM = function(number) {
+  // Delete line(s), scrolling up the bottom of the screen.
+  if (this.cursorY >= this.bottom) {
+    return;
+  }
+  if (number == 0) {
+    number = 1;
+  }
+  if (number > this.bottom - this.cursorY) {
+    number = bottom - cursorY;
+  }
+  this.scrollRegion(0, this.cursorY + number,
+                    this.terminalWidth, this.bottom - this.cursorY - number,
+                    0, -number, this.color, this.style);
+  needWrap = false;
+};
+
+VT100.prototype.csim = function() {
+  for (var i = 0; i <= this.npar; i++) {
+    switch (this.par[i]) {
+    case 0:  this.attr  = 0x00F0 /* ATTR_DEFAULT */;                                break;
+    case 1:  this.attr  = (this.attr & ~0x0400 /* ATTR_DIM */)|0x0800 /* ATTR_BRIGHT */;         break;
+    case 2:  this.attr  = (this.attr & ~0x0800 /* ATTR_BRIGHT */)|0x0400 /* ATTR_DIM */;         break;
+    case 4:  this.attr |= 0x0200 /* ATTR_UNDERLINE */;                              break;
+    case 5:  this.attr |= 0x1000 /* ATTR_BLINK */;                                  break;
+    case 7:  this.attr |= 0x0100 /* ATTR_REVERSE */;                                break;
+    case 10:
+      this.translate    = this.GMap[this.useGMap];
+      this.dispCtrl     = false;
+      this.toggleMeta   = false;
+      break;
+    case 11:
+      this.translate    = this.CodePage437Map;
+      this.dispCtrl     = true;
+      this.toggleMeta   = false;
+      break;
+    case 12:
+      this.translate    = this.CodePage437Map;
+      this.dispCtrl     = true;
+      this.toggleMeta   = true;
+      break;
+    case 21:
+    case 22: this.attr &= ~(0x0800 /* ATTR_BRIGHT */|0x0400 /* ATTR_DIM */);                     break;
+    case 24: this.attr &= ~ 0x0200 /* ATTR_UNDERLINE */;                            break;
+    case 25: this.attr &= ~ 0x1000 /* ATTR_BLINK */;                                break;
+    case 27: this.attr &= ~ 0x0100 /* ATTR_REVERSE */;                              break;
+    case 38: this.attr  = (this.attr & ~(0x0400 /* ATTR_DIM */|0x0800 /* ATTR_BRIGHT */|0x0F))|
+                          0x0200 /* ATTR_UNDERLINE */;                              break;
+    case 39: this.attr &= ~(0x0400 /* ATTR_DIM */|0x0800 /* ATTR_BRIGHT */|0x0200 /* ATTR_UNDERLINE */|0x0F); break;
+    case 49: this.attr |= 0xF0;                                        break;
+    default:
+      if (this.par[i] >= 30 && this.par[i] <= 37) {
+          var fg        = this.par[i] - 30;
+          this.attr     = (this.attr & ~0x0F) | fg;
+      } else if (this.par[i] >= 40 && this.par[i] <= 47) {
+          var bg        = this.par[i] - 40;
+          this.attr     = (this.attr & ~0xF0) | (bg << 4);
+      }
+      break;
+    }
+  }
+  this.updateStyle();
+};
+
+VT100.prototype.csiP = function(number) {
+  // Delete character(s) following cursor
+  if (number == 0) {
+    number = 1;
+  }
+  if (number > this.terminalWidth - this.cursorX) {
+    number = this.terminalWidth - this.cursorX;
+  }
+  this.scrollRegion(this.cursorX + number, this.cursorY,
+                    this.terminalWidth - this.cursorX - number, 1,
+                    -number, 0, this.color, this.style);
+  needWrap = false;
+};
+
+VT100.prototype.csiX = function(number) {
+  // Clear characters following cursor
+  if (number == 0) {
+    number++;
+  }
+  if (number > this.terminalWidth - this.cursorX) {
+    number = this.terminalWidth - this.cursorX;
+  }
+  this.clearRegion(this.cursorX, this.cursorY, number, 1,
+                   this.color, this.style);
+  needWrap = false;
+};
+
+VT100.prototype.settermCommand = function() {
+  // Setterm commands are not implemented
+};
+
+VT100.prototype.doControl = function(ch) {
+  if (this.printing) {
+    this.sendControlToPrinter(ch);
+    return '';
+  }
+  var lineBuf                = '';
+  switch (ch) {
+  case 0x00: /* ignored */                                              break;
+  case 0x08: this.bs();                                                 break;
+  case 0x09: this.ht();                                                 break;
+  case 0x0A:
+  case 0x0B:
+  case 0x0C:
+  case 0x84: this.lf(); if (!this.crLfMode)                             break;
+  case 0x0D: this.cr();                                                 break;
+  case 0x85: this.cr(); this.lf();                                      break;
+  case 0x0E: this.useGMap     = 1;
+             this.translate   = this.GMap[1];
+             this.dispCtrl    = true;                                   break;
+  case 0x0F: this.useGMap     = 0;
+             this.translate   = this.GMap[0];
+             this.dispCtrl    = false;                                  break;
+  case 0x18:
+  case 0x1A: this.isEsc       = 0 /* ESnormal */;                               break;
+  case 0x1B: this.isEsc       = 1 /* ESesc */;                                  break;
+  case 0x7F: /* ignored */                                              break;
+  case 0x88: this.userTabStop[this.cursorX] = true;                     break;
+  case 0x8D: this.ri();                                                 break;
+  case 0x8E: this.isEsc       = 18 /* ESss2 */;                                  break;
+  case 0x8F: this.isEsc       = 19 /* ESss3 */;                                  break;
+  case 0x9A: this.respondID();                                          break;
+  case 0x9B: this.isEsc       = 2 /* ESsquare */;                               break;
+  case 0x07: if (this.isEsc != 17 /* EStitle */) {
+               this.beep();                                             break;
+             }
+             /* fall thru */
+  default:   switch (this.isEsc) {
+    case 1 /* ESesc */:
+      this.isEsc              = 0 /* ESnormal */;
+      switch (ch) {
+/*%*/ case 0x25: this.isEsc   = 13 /* ESpercent */;                              break;
+/*(*/ case 0x28: this.isEsc   = 8 /* ESsetG0 */;                                break;
+/*-*/ case 0x2D:
+/*)*/ case 0x29: this.isEsc   = 9 /* ESsetG1 */;                                break;
+/*.*/ case 0x2E:
+/***/ case 0x2A: this.isEsc   = 10 /* ESsetG2 */;                                break;
+/*/*/ case 0x2F:
+/*+*/ case 0x2B: this.isEsc   = 11 /* ESsetG3 */;                                break;
+/*#*/ case 0x23: this.isEsc   = 7 /* EShash */;                                 break;
+/*7*/ case 0x37: this.saveCursor();                                     break;
+/*8*/ case 0x38: this.restoreCursor();                                  break;
+/*>*/ case 0x3E: this.applKeyMode = false;                              break;
+/*=*/ case 0x3D: this.applKeyMode = true;                               break;
+/*D*/ case 0x44: this.lf();                                             break;
+/*E*/ case 0x45: this.cr(); this.lf();                                  break;
+/*M*/ case 0x4D: this.ri();                                             break;
+/*N*/ case 0x4E: this.isEsc   = 18 /* ESss2 */;                                  break;
+/*O*/ case 0x4F: this.isEsc   = 19 /* ESss3 */;                                  break;
+/*H*/ case 0x48: this.userTabStop[this.cursorX] = true;                 break;
+/*Z*/ case 0x5A: this.respondID();                                      break;
+/*[*/ case 0x5B: this.isEsc   = 2 /* ESsquare */;                               break;
+/*]*/ case 0x5D: this.isEsc   = 15 /* ESnonstd */;                               break;
+/*c*/ case 0x63: this.reset();                                          break;
+/*g*/ case 0x67: this.flashScreen();                                    break;
+      default:                                                          break;
+      }
+      break;
+    case 15 /* ESnonstd */:
+      switch (ch) {
+/*0*/ case 0x30:
+/*1*/ case 0x31:
+/*2*/ case 0x32: this.isEsc   = 17 /* EStitle */; this.titleString = '';         break;
+/*P*/ case 0x50: this.npar    = 0; this.par = [ 0, 0, 0, 0, 0, 0, 0 ];
+                 this.isEsc   = 16 /* ESpalette */;                              break;
+/*R*/ case 0x52: // Palette support is not implemented
+                 this.isEsc   = 0 /* ESnormal */;                               break;
+      default:   this.isEsc   = 0 /* ESnormal */;                               break;
+      }
+      break;
+    case 16 /* ESpalette */:
+      if ((ch >= 0x30 /*0*/ && ch <= 0x39 /*9*/) ||
+          (ch >= 0x41 /*A*/ && ch <= 0x46 /*F*/) ||
+          (ch >= 0x61 /*a*/ && ch <= 0x66 /*f*/)) {
+        this.par[this.npar++] = ch > 0x39  /*9*/ ? (ch & 0xDF) - 55
+                                                : (ch & 0xF);
+        if (this.npar == 7) {
+          // Palette support is not implemented
+          this.isEsc          = 0 /* ESnormal */;
+        }
+      } else {
+        this.isEsc            = 0 /* ESnormal */;
+      }
+      break;
+    case 2 /* ESsquare */:
+      this.npar               = 0;
+      this.par                = [ 0, 0, 0, 0, 0, 0, 0, 0,
+                                  0, 0, 0, 0, 0, 0, 0, 0 ];
+      this.isEsc              = 3 /* ESgetpars */;
+/*[*/ if (ch == 0x5B) { // Function key
+        this.isEsc            = 6 /* ESfunckey */;
+        break;
+      } else {
+/*?*/   this.isQuestionMark   = ch == 0x3F;
+        if (this.isQuestionMark) {
+          break;
+        }
+      }
+      // Fall through
+    case 5 /* ESdeviceattr */:
+    case 3 /* ESgetpars */: 
+/*;*/ if (ch == 0x3B) {
+        this.npar++;
+        break;
+      } else if (ch >= 0x30 /*0*/ && ch <= 0x39 /*9*/) {
+        var par               = this.par[this.npar];
+        if (par == undefined) {
+          par                 = 0;
+        }
+        this.par[this.npar]   = 10*par + (ch & 0xF);
+        break;
+      } else if (this.isEsc == 5 /* ESdeviceattr */) {
+        switch (ch) {
+/*c*/   case 0x63: if (this.par[0] == 0) this.respondSecondaryDA();     break;
+/*m*/   case 0x6D: /* (re)set key modifier resource values */           break;
+/*n*/   case 0x6E: /* disable key modifier resource values */           break;
+/*p*/   case 0x70: /* set pointer mode resource value */                break;
+        default:                                                        break;
+        }
+        this.isEsc            = 0 /* ESnormal */;
+        break;
+      } else {
+        this.isEsc            = 4 /* ESgotpars */;
+      }
+      // Fall through
+    case 4 /* ESgotpars */:
+      this.isEsc              = 0 /* ESnormal */;
+      if (this.isQuestionMark) {
+        switch (ch) {
+/*h*/   case 0x68: this.setMode(true);                                  break;
+/*l*/   case 0x6C: this.setMode(false);                                 break;
+/*c*/   case 0x63: this.setCursorAttr(this.par[2], this.par[1]);        break;
+        default:                                                        break;
+        }
+        this.isQuestionMark   = false;
+        break;
+      }
+      switch (ch) {
+/*!*/ case 0x21: this.isEsc   = 12 /* ESbang */;                                 break;
+/*>*/ case 0x3E: if (!this.npar) this.isEsc  = 5 /* ESdeviceattr */;            break;
+/*G*/ case 0x47:
+/*`*/ case 0x60: this.gotoXY(this.par[0] - 1, this.cursorY);            break;
+/*A*/ case 0x41: this.gotoXY(this.cursorX,
+                             this.cursorY - (this.par[0] ? this.par[0] : 1));
+                                                                        break;
+/*B*/ case 0x42:
+/*e*/ case 0x65: this.gotoXY(this.cursorX,
+                             this.cursorY + (this.par[0] ? this.par[0] : 1));
+                                                                        break;
+/*C*/ case 0x43:
+/*a*/ case 0x61: this.gotoXY(this.cursorX + (this.par[0] ? this.par[0] : 1),
+                             this.cursorY);                             break;
+/*D*/ case 0x44: this.gotoXY(this.cursorX - (this.par[0] ? this.par[0] : 1),
+                             this.cursorY);                             break;
+/*E*/ case 0x45: this.gotoXY(0, this.cursorY + (this.par[0] ? this.par[0] :1));
+                                                                        break;
+/*F*/ case 0x46: this.gotoXY(0, this.cursorY - (this.par[0] ? this.par[0] :1));
+                                                                        break;
+/*d*/ case 0x64: this.gotoXaY(this.cursorX, this.par[0] - 1);           break;
+/*H*/ case 0x48:
+/*f*/ case 0x66: this.gotoXaY(this.par[1] - 1, this.par[0] - 1);        break;
+/*I*/ case 0x49: this.ht(this.par[0] ? this.par[0] : 1);                break;
+/*@*/ case 0x40: this.csiAt(this.par[0]);                               break;
+/*i*/ case 0x69: this.csii(this.par[0]);                                break;
+/*J*/ case 0x4A: this.csiJ(this.par[0]);                                break;
+/*K*/ case 0x4B: this.csiK(this.par[0]);                                break;
+/*L*/ case 0x4C: this.csiL(this.par[0]);                                break;
+/*M*/ case 0x4D: this.csiM(this.par[0]);                                break;
+/*m*/ case 0x6D: this.csim();                                           break;
+/*P*/ case 0x50: this.csiP(this.par[0]);                                break;
+/*X*/ case 0x58: this.csiX(this.par[0]);                                break;
+/*S*/ case 0x53: this.lf(this.par[0] ? this.par[0] : 1);                break;
+/*T*/ case 0x54: this.ri(this.par[0] ? this.par[0] : 1);                break;
+/*c*/ case 0x63: if (!this.par[0]) this.respondID();                    break;
+/*g*/ case 0x67: if (this.par[0] == 0) {
+                   this.userTabStop[this.cursorX] = false;
+                 } else if (this.par[0] == 2 || this.par[0] == 3) {
+                   this.userTabStop               = [ ];
+                   for (var i = 0; i < this.terminalWidth; i++) {
+                     this.userTabStop[i]          = false;
+                   }
+                 }
+                 break;
+/*h*/ case 0x68: this.setMode(true);                                    break;
+/*l*/ case 0x6C: this.setMode(false);                                   break;
+/*n*/ case 0x6E: switch (this.par[0]) {
+                 case 5: this.statusReport();                           break;
+                 case 6: this.cursorReport();                           break;
+                 default:                                               break;
+                 }
+                 break;
+/*q*/ case 0x71: // LED control not implemented
+                                                                        break;
+/*r*/ case 0x72: var t        = this.par[0] ? this.par[0] : 1;
+                 var b        = this.par[1] ? this.par[1]
+                                            : this.terminalHeight;
+                 if (t < b && b <= this.terminalHeight) {
+                   this.top   = t - 1;
+                   this.bottom= b;
+                   this.gotoXaY(0, 0);
+                 }
+                 break;
+/*b*/ case 0x62: var c        = this.par[0] ? this.par[0] : 1;
+                 if (c > this.terminalWidth * this.terminalHeight) {
+                   c          = this.terminalWidth * this.terminalHeight;
+                 }
+                 while (c-- > 0) {
+                   lineBuf   += this.lastCharacter;
+                 }
+                 break;
+/*s*/ case 0x73: this.saveCursor();                                     break;
+/*u*/ case 0x75: this.restoreCursor();                                  break;
+/*Z*/ case 0x5A: this.rt(this.par[0] ? this.par[0] : 1);                break;
+/*]*/ case 0x5D: this.settermCommand();                                 break;
+      default:                                                          break;
+      }
+      break;
+    case 12 /* ESbang */:
+      if (ch == 'p') {
+        this.reset();
+      }
+      this.isEsc              = 0 /* ESnormal */;
+      break;
+    case 13 /* ESpercent */:
+      this.isEsc              = 0 /* ESnormal */;
+      switch (ch) {
+/*@*/ case 0x40: this.utfEnabled = false;                               break;
+/*G*/ case 0x47:
+/*8*/ case 0x38: this.utfEnabled = true;                                break;
+      default:                                                          break;
+      }
+      break;
+    case 6 /* ESfunckey */:
+      this.isEsc              = 0 /* ESnormal */;                               break;
+    case 7 /* EShash */:
+      this.isEsc              = 0 /* ESnormal */;
+/*8*/ if (ch == 0x38) {
+        // Screen alignment test not implemented
+      }
+      break;
+    case 8 /* ESsetG0 */:
+    case 9 /* ESsetG1 */:
+    case 10 /* ESsetG2 */:
+    case 11 /* ESsetG3 */:
+      var g                   = this.isEsc - 8 /* ESsetG0 */;
+      this.isEsc              = 0 /* ESnormal */;
+      switch (ch) {
+/*0*/ case 0x30: this.GMap[g] = this.VT100GraphicsMap;                  break;
+/*A*/ case 0x42:
+/*B*/ case 0x42: this.GMap[g] = this.Latin1Map;                         break;
+/*U*/ case 0x55: this.GMap[g] = this.CodePage437Map;                    break;
+/*K*/ case 0x4B: this.GMap[g] = this.DirectToFontMap;                   break;
+      default:                                                          break;
+      }
+      if (this.useGMap == g) {
+        this.translate        = this.GMap[g];
+      }
+      break;
+    case 17 /* EStitle */:
+      if (ch == 0x07) {
+        if (this.titleString && this.titleString.charAt(0) == ';') {
+          this.titleString    = this.titleString.substr(1);
+          if (this.titleString != '') {
+            this.titleString += ' - ';
+          }
+          this.titleString += 'Shell In A Box'
+        }
+        try {
+          window.document.title = this.titleString;
+        } catch (e) {
+        }
+        this.isEsc            = 0 /* ESnormal */;
+      } else {
+        this.titleString     += String.fromCharCode(ch);
+      }
+      break;
+    case 18 /* ESss2 */:
+    case 19 /* ESss3 */:
+      if (ch < 256) {
+          ch                  = this.GMap[this.isEsc - 18 /* ESss2 */ + 2]
+                                         [this.toggleMeta ? (ch | 0x80) : ch];
+        if ((ch & 0xFF00) == 0xF000) {
+          ch                  = ch & 0xFF;
+        } else if (ch == 0xFEFF || (ch >= 0x200A && ch <= 0x200F)) {
+          this.isEsc         = 0 /* ESnormal */;                                break;
+        }
+      }
+      this.lastCharacter      = String.fromCharCode(ch);
+      lineBuf                += this.lastCharacter;
+      this.isEsc              = 0 /* ESnormal */;                               break;
+    default:
+      this.isEsc              = 0 /* ESnormal */;                               break;
+    }
+    break;
+  }
+  return lineBuf;
+};
+
+VT100.prototype.renderString = function(s, showCursor) {
+  if (this.printing) {
+    this.sendToPrinter(s);
+    if (showCursor) {
+      this.showCursor();
+    }
+    return;
+  }
+
+  // We try to minimize the number of DOM operations by coalescing individual
+  // characters into strings. This is a significant performance improvement.
+  var incX = s.length;
+  if (incX > this.terminalWidth - this.cursorX) {
+    incX   = this.terminalWidth - this.cursorX;
+    if (incX <= 0) {
+      return;
+    }
+    s      = s.substr(0, incX - 1) + s.charAt(s.length - 1);
+  }
+  if (showCursor) {
+    // Minimize the number of calls to putString(), by avoiding a direct
+    // call to this.showCursor()
+    this.cursor.style.visibility = '';
+  }
+  this.putString(this.cursorX, this.cursorY, s, this.color, this.style);
+};
+
+VT100.prototype.vt100 = function(s) {
+  this.cursorNeedsShowing = this.hideCursor();
+  this.respondString      = '';
+  var lineBuf             = '';
+  for (var i = 0; i < s.length; i++) {
+    var ch = s.charCodeAt(i);
+    if (this.utfEnabled) {
+      // Decode UTF8 encoded character
+      if (ch > 0x7F) {
+        if (this.utfCount > 0 && (ch & 0xC0) == 0x80) {
+          this.utfChar    = (this.utfChar << 6) | (ch & 0x3F);
+          if (--this.utfCount <= 0) {
+            if (this.utfChar > 0xFFFF || this.utfChar < 0) {
+              ch = 0xFFFD;
+            } else {
+              ch          = this.utfChar;
+            }
+          } else {
+            continue;
+          }
+        } else {
+          if ((ch & 0xE0) == 0xC0) {
+            this.utfCount = 1;
+            this.utfChar  = ch & 0x1F;
+          } else if ((ch & 0xF0) == 0xE0) {
+            this.utfCount = 2;
+            this.utfChar  = ch & 0x0F;
+          } else if ((ch & 0xF8) == 0xF0) {
+            this.utfCount = 3;
+            this.utfChar  = ch & 0x07;
+          } else if ((ch & 0xFC) == 0xF8) {
+            this.utfCount = 4;
+            this.utfChar  = ch & 0x03;
+          } else if ((ch & 0xFE) == 0xFC) {
+            this.utfCount = 5;
+            this.utfChar  = ch & 0x01;
+          } else {
+            this.utfCount = 0;
+          }
+          continue;
+        }
+      } else {
+        this.utfCount     = 0;
+      }
+    }
+    var isNormalCharacter =
+      (ch >= 32 && ch <= 127 || ch >= 160 ||
+       this.utfEnabled && ch >= 128 ||
+       !(this.dispCtrl ? this.ctrlAlways : this.ctrlAction)[ch & 0x1F]) &&
+      (ch != 0x7F || this.dispCtrl);
+    
+    if (isNormalCharacter && this.isEsc == 0 /* ESnormal */) {
+      if (ch < 256) {
+        ch                = this.translate[this.toggleMeta ? (ch | 0x80) : ch];
+      }
+      if ((ch & 0xFF00) == 0xF000) {
+        ch                = ch & 0xFF;
+      } else if (ch == 0xFEFF || (ch >= 0x200A && ch <= 0x200F)) {
+        continue;
+      }
+      if (!this.printing) {
+        if (this.needWrap || this.insertMode) {
+          if (lineBuf) {
+            this.renderString(lineBuf);
+            lineBuf       = '';
+          }
+        }
+        if (this.needWrap) {
+          this.cr(); this.lf();
+        }
+        if (this.insertMode) {
+          this.scrollRegion(this.cursorX, this.cursorY,
+                            this.terminalWidth - this.cursorX - 1, 1,
+                            1, 0, this.color, this.style);
+        }
+      }
+      this.lastCharacter  = String.fromCharCode(ch);
+      lineBuf            += this.lastCharacter;
+      if (!this.printing &&
+          this.cursorX + lineBuf.length >= this.terminalWidth) {
+        this.needWrap     = this.autoWrapMode;
+      }
+    } else {
+      if (lineBuf) {
+        this.renderString(lineBuf);
+        lineBuf           = '';
+      }
+      var expand          = this.doControl(ch);
+      if (expand.length) {
+        var r             = this.respondString;
+        this.respondString= r + this.vt100(expand);
+      }
+    }
+  }
+  if (lineBuf) {
+    this.renderString(lineBuf, this.cursorNeedsShowing);
+  } else if (this.cursorNeedsShowing) {
+    this.showCursor();
+  }
+  return this.respondString;
+};
+
+VT100.prototype.Latin1Map = [
+0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x007F,
+0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
+];
+
+VT100.prototype.VT100GraphicsMap = [
+0x0000, 0x0001, 0x0002, 0x0003, 0x0004, 0x0005, 0x0006, 0x0007,
+0x0008, 0x0009, 0x000A, 0x000B, 0x000C, 0x000D, 0x000E, 0x000F,
+0x0010, 0x0011, 0x0012, 0x0013, 0x0014, 0x0015, 0x0016, 0x0017,
+0x0018, 0x0019, 0x001A, 0x001B, 0x001C, 0x001D, 0x001E, 0x001F,
+0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+0x0028, 0x0029, 0x002A, 0x2192, 0x2190, 0x2191, 0x2193, 0x002F,
+0x2588, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x00A0,
+0x25C6, 0x2592, 0x2409, 0x240C, 0x240D, 0x240A, 0x00B0, 0x00B1,
+0x2591, 0x240B, 0x2518, 0x2510, 0x250C, 0x2514, 0x253C, 0xF800,
+0xF801, 0x2500, 0xF803, 0xF804, 0x251C, 0x2524, 0x2534, 0x252C,
+0x2502, 0x2264, 0x2265, 0x03C0, 0x2260, 0x00A3, 0x00B7, 0x007F,
+0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
+0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
+0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
+0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
+0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
+0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
+0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
+0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
+0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
+0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
+0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
+0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
+0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
+0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
+0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
+0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF
+];
+
+VT100.prototype.CodePage437Map = [
+0x0000, 0x263A, 0x263B, 0x2665, 0x2666, 0x2663, 0x2660, 0x2022,
+0x25D8, 0x25CB, 0x25D9, 0x2642, 0x2640, 0x266A, 0x266B, 0x263C,
+0x25B6, 0x25C0, 0x2195, 0x203C, 0x00B6, 0x00A7, 0x25AC, 0x21A8,
+0x2191, 0x2193, 0x2192, 0x2190, 0x221F, 0x2194, 0x25B2, 0x25BC,
+0x0020, 0x0021, 0x0022, 0x0023, 0x0024, 0x0025, 0x0026, 0x0027,
+0x0028, 0x0029, 0x002A, 0x002B, 0x002C, 0x002D, 0x002E, 0x002F,
+0x0030, 0x0031, 0x0032, 0x0033, 0x0034, 0x0035, 0x0036, 0x0037,
+0x0038, 0x0039, 0x003A, 0x003B, 0x003C, 0x003D, 0x003E, 0x003F,
+0x0040, 0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047,
+0x0048, 0x0049, 0x004A, 0x004B, 0x004C, 0x004D, 0x004E, 0x004F,
+0x0050, 0x0051, 0x0052, 0x0053, 0x0054, 0x0055, 0x0056, 0x0057,
+0x0058, 0x0059, 0x005A, 0x005B, 0x005C, 0x005D, 0x005E, 0x005F,
+0x0060, 0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067,
+0x0068, 0x0069, 0x006A, 0x006B, 0x006C, 0x006D, 0x006E, 0x006F,
+0x0070, 0x0071, 0x0072, 0x0073, 0x0074, 0x0075, 0x0076, 0x0077,
+0x0078, 0x0079, 0x007A, 0x007B, 0x007C, 0x007D, 0x007E, 0x2302,
+0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
+0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
+0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
+0x00FF, 0x00D6, 0x00DC, 0x00A2, 0x00A3, 0x00A5, 0x20A7, 0x0192,
+0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
+0x00BF, 0x2310, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
+0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
+0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
+0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
+0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
+0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
+0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
+0x03B1, 0x00DF, 0x0393, 0x03C0, 0x03A3, 0x03C3, 0x00B5, 0x03C4,
+0x03A6, 0x0398, 0x03A9, 0x03B4, 0x221E, 0x03C6, 0x03B5, 0x2229,
+0x2261, 0x00B1, 0x2265, 0x2264, 0x2320, 0x2321, 0x00F7, 0x2248,
+0x00B0, 0x2219, 0x00B7, 0x221A, 0x207F, 0x00B2, 0x25A0, 0x00A0
+];
+
+VT100.prototype.DirectToFontMap = [
+0xF000, 0xF001, 0xF002, 0xF003, 0xF004, 0xF005, 0xF006, 0xF007,
+0xF008, 0xF009, 0xF00A, 0xF00B, 0xF00C, 0xF00D, 0xF00E, 0xF00F,
+0xF010, 0xF011, 0xF012, 0xF013, 0xF014, 0xF015, 0xF016, 0xF017,
+0xF018, 0xF019, 0xF01A, 0xF01B, 0xF01C, 0xF01D, 0xF01E, 0xF01F,
+0xF020, 0xF021, 0xF022, 0xF023, 0xF024, 0xF025, 0xF026, 0xF027,
+0xF028, 0xF029, 0xF02A, 0xF02B, 0xF02C, 0xF02D, 0xF02E, 0xF02F,
+0xF030, 0xF031, 0xF032, 0xF033, 0xF034, 0xF035, 0xF036, 0xF037,
+0xF038, 0xF039, 0xF03A, 0xF03B, 0xF03C, 0xF03D, 0xF03E, 0xF03F,
+0xF040, 0xF041, 0xF042, 0xF043, 0xF044, 0xF045, 0xF046, 0xF047,
+0xF048, 0xF049, 0xF04A, 0xF04B, 0xF04C, 0xF04D, 0xF04E, 0xF04F,
+0xF050, 0xF051, 0xF052, 0xF053, 0xF054, 0xF055, 0xF056, 0xF057,
+0xF058, 0xF059, 0xF05A, 0xF05B, 0xF05C, 0xF05D, 0xF05E, 0xF05F,
+0xF060, 0xF061, 0xF062, 0xF063, 0xF064, 0xF065, 0xF066, 0xF067,
+0xF068, 0xF069, 0xF06A, 0xF06B, 0xF06C, 0xF06D, 0xF06E, 0xF06F,
+0xF070, 0xF071, 0xF072, 0xF073, 0xF074, 0xF075, 0xF076, 0xF077,
+0xF078, 0xF079, 0xF07A, 0xF07B, 0xF07C, 0xF07D, 0xF07E, 0xF07F,
+0xF080, 0xF081, 0xF082, 0xF083, 0xF084, 0xF085, 0xF086, 0xF087,
+0xF088, 0xF089, 0xF08A, 0xF08B, 0xF08C, 0xF08D, 0xF08E, 0xF08F,
+0xF090, 0xF091, 0xF092, 0xF093, 0xF094, 0xF095, 0xF096, 0xF097,
+0xF098, 0xF099, 0xF09A, 0xF09B, 0xF09C, 0xF09D, 0xF09E, 0xF09F,
+0xF0A0, 0xF0A1, 0xF0A2, 0xF0A3, 0xF0A4, 0xF0A5, 0xF0A6, 0xF0A7,
+0xF0A8, 0xF0A9, 0xF0AA, 0xF0AB, 0xF0AC, 0xF0AD, 0xF0AE, 0xF0AF,
+0xF0B0, 0xF0B1, 0xF0B2, 0xF0B3, 0xF0B4, 0xF0B5, 0xF0B6, 0xF0B7,
+0xF0B8, 0xF0B9, 0xF0BA, 0xF0BB, 0xF0BC, 0xF0BD, 0xF0BE, 0xF0BF,
+0xF0C0, 0xF0C1, 0xF0C2, 0xF0C3, 0xF0C4, 0xF0C5, 0xF0C6, 0xF0C7,
+0xF0C8, 0xF0C9, 0xF0CA, 0xF0CB, 0xF0CC, 0xF0CD, 0xF0CE, 0xF0CF,
+0xF0D0, 0xF0D1, 0xF0D2, 0xF0D3, 0xF0D4, 0xF0D5, 0xF0D6, 0xF0D7,
+0xF0D8, 0xF0D9, 0xF0DA, 0xF0DB, 0xF0DC, 0xF0DD, 0xF0DE, 0xF0DF,
+0xF0E0, 0xF0E1, 0xF0E2, 0xF0E3, 0xF0E4, 0xF0E5, 0xF0E6, 0xF0E7,
+0xF0E8, 0xF0E9, 0xF0EA, 0xF0EB, 0xF0EC, 0xF0ED, 0xF0EE, 0xF0EF,
+0xF0F0, 0xF0F1, 0xF0F2, 0xF0F3, 0xF0F4, 0xF0F5, 0xF0F6, 0xF0F7,
+0xF0F8, 0xF0F9, 0xF0FA, 0xF0FB, 0xF0FC, 0xF0FD, 0xF0FE, 0xF0FF
+];
+
+VT100.prototype.ctrlAction = [
+  true,  false, false, false, false, false, false, true,
+  true,  true,  true,  true,  true,  true,  true,  true,
+  false, false, false, false, false, false, false, false,
+  true,  false, true,  true,  false, false, false, false
+];
+
+VT100.prototype.ctrlAlways = [
+  true,  false, false, false, false, false, false, false,
+  true,  false, true,  false, true,  true,  true,  true,
+  false, false, false, false, false, false, false, false,
+  false, false, false, true,  false, false, false, false
+];
+
+
diff --git a/apps/workbench/public/webshell/styles.css b/apps/workbench/public/webshell/styles.css

new file mode 100644 (file)

index 0000000..3097cb4
--- /dev/null
+++ b/apps/workbench/public/webshell/styles.css
@@ -0,0 +1,272 @@
+#vt100 a { 
+  text-decoration:      none;
+  color:                inherit;
+}
+
+#vt100 a:hover { 
+  text-decoration:      underline;
+}
+
+#vt100 #reconnect {
+  position:             absolute;
+  z-index:              2;
+}
+
+#vt100 #reconnect input { 
+  padding:              1ex;
+  font-weight:          bold;
+  font-size:            x-large;
+}
+
+#vt100 #cursize {
+  background:           #EEEEEE;
+  border:               1px solid black;
+  font-family:          sans-serif;
+  font-size:            large;
+  font-weight:          bold;
+  padding:              1ex;
+  position:             absolute;
+  z-index:              2;
+}
+
+#vt100 pre { 
+  margin:               0px;
+}
+
+#vt100 pre pre {
+  overflow:             hidden;
+}
+
+#vt100 #scrollable {
+  overflow-x:           hidden;
+  overflow-y:           scroll;
+  position:             relative;
+  padding:              1px;
+}
+
+#vt100 #console, #vt100 #alt_console, #vt100 #cursor, #vt100 #lineheight, #vt100 .hidden pre { 
+  font-family:          "DejaVu Sans Mono", "Everson Mono", FreeMono, "Andale Mono", monospace;
+}
+
+#vt100 #lineheight { 
+  position:             absolute;
+  visibility:           hidden;
+}
+
+#vt100 #cursor {
+  position:             absolute;
+  left:                 0px;
+  top:                  0px;
+  overflow:             hidden;
+  z-index:              1;
+}
+
+#vt100 #cursor.bright {
+  background-color:     black;
+  color:                white;
+}
+
+#vt100 #cursor.dim {
+  visibility:           hidden;
+}
+
+#vt100 #cursor.inactive {
+  border:               1px solid;
+  margin:               -1px;
+}
+
+#vt100 #padding { 
+  visibility:           hidden;
+  width:                1px;
+  height:               0px;
+  overflow:             hidden;
+}
+
+#vt100 .hidden {
+  position:             absolute;
+  top:                  -10000px;
+  left:                 -10000px;
+  width:                0px;
+  height:               0px;
+}
+
+#vt100 #menu { 
+  overflow:             visible;
+  position:             absolute;
+  z-index:              3;
+}
+
+#vt100 #menu .popup {
+  background-color:     #EEEEEE;
+  border:               1px solid black;
+  font-family:          sans-serif;
+  position:             absolute;
+}
+
+#vt100 #menu .popup ul { 
+  list-style-type:      none;
+  padding:              0px;
+  margin:               0px;
+  min-width:            10em;
+}
+
+#vt100 #menu .popup li { 
+  padding:              3px 0.5ex 3px 0.5ex;
+}
+
+#vt100 #menu .popup li.hover {
+  background-color:     #444444;
+  color:                white;
+}
+
+#vt100 #menu .popup li.disabled {
+  color:                #AAAAAA;
+}
+
+#vt100 #menu .popup hr { 
+  margin:               0.5ex 0px 0.5ex 0px;
+}
+
+#vt100 #menu img { 
+  margin-right:         0.5ex;
+  width:                1ex;
+  height:               1ex;
+}
+
+#vt100 #scrollable.inverted { color:            #ffffff;
+                              background-color: #000000; }
+
+#vt100 #kbd_button { 
+  float:                left;
+  position:             fixed;
+  z-index:              0;
+  visibility:           hidden;
+}
+
+#vt100 #keyboard {
+  z-index:              3;
+  position:             absolute;
+}
+
+#vt100 #keyboard .box {
+  font-family:          sans-serif;
+  background-color:     #cccccc;
+  padding:              .8em;
+  float:                left;
+  position:             absolute;
+  border-radius:        10px;
+  -moz-border-radius:   10px;
+  box-shadow:           4px 4px 6px #222222;
+  -webkit-box-shadow:   4px 4px 6px #222222;
+  /* Don't set the -moz-box-shadow. It doesn't properly scale when CSS
+   * transforms are in effect. Once Firefox supports box-shadow, it should
+   * automatically do the right thing. Until then, leave shadows disabled
+   * for Firefox.
+   */
+  opacity:              0.85;
+  -moz-opacity:         0.85;
+  filter:               alpha(opacity=85);
+}
+
+#vt100 #keyboard .box * {
+  vertical-align:       top;
+  display:              inline-block;
+}
+
+#vt100 #keyboard b, #vt100 #keyboard i, #vt100 #keyboard s, #vt100 #keyboard u {
+  font-style:           normal;
+  font-weight:          bold;
+  border-radius:        5px;
+  -moz-border-radius:   5px;
+  background-color:     #555555;
+  color:                #eeeeee;
+  box-shadow:           2px 2px 3px #222222;
+  -webkit-box-shadow:   2px 2px 3px #222222;
+  padding:              4px;
+  margin:               2px;
+  height:               2ex;
+  display:              inline-block;
+  text-align:           center;
+  text-decoration:      none;
+}
+
+#vt100 #keyboard b, #vt100 #keyboard s {
+  width:                2ex;
+}
+
+#vt100 #keyboard u, #vt100 #keyboard s {
+  visibility:           hidden;
+}
+
+#vt100 #keyboard .shifted { 
+  display:              none;
+}
+
+#vt100 #keyboard .selected {
+  color:                #888888;
+  background-color:     #eeeeee;
+  box-shadow:           0px 0px 3px #222222;
+  -webkit-box-shadow:   0px 0px 3px #222222;
+  position:             relative;
+  top:                  1px;
+  left:                 1px;
+}
+
+[if DEFINES_COLORS]
+/* IE cannot properly handle "inherit" properties. So, the monochrome.css/
+ * color.css style sheets cannot work, if we define colors in styles.css.
+ */
+[else DEFINES_COLORS]
+#vt100 .ansi0               {                            }
+#vt100 .ansi1               { color:            #cd0000; }
+#vt100 .ansi2               { color:            #00cd00; }
+#vt100 .ansi3               { color:            #cdcd00; }
+#vt100 .ansi4               { color:            #0000ee; }
+#vt100 .ansi5               { color:            #cd00cd; }
+#vt100 .ansi6               { color:            #00cdcd; }
+#vt100 .ansi7               { color:            #e5e5e5; }
+#vt100 .ansi8               { color:            #7f7f7f; }
+#vt100 .ansi9               { color:            #ff0000; }
+#vt100 .ansi10              { color:            #00ff00; }
+#vt100 .ansi11              { color:            #e8e800; }
+#vt100 .ansi12              { color:            #5c5cff; }
+#vt100 .ansi13              { color:            #ff00ff; }
+#vt100 .ansi14              { color:            #00ffff; }
+#vt100 .ansi15              { color:            #ffffff; }
+
+#vt100 .bgAnsi0             { background-color: #000000; }
+#vt100 .bgAnsi1             { background-color: #cd0000; }
+#vt100 .bgAnsi2             { background-color: #00cd00; }
+#vt100 .bgAnsi3             { background-color: #cdcd00; }
+#vt100 .bgAnsi4             { background-color: #0000ee; }
+#vt100 .bgAnsi5             { background-color: #cd00cd; }
+#vt100 .bgAnsi6             { background-color: #00cdcd; }
+#vt100 .bgAnsi7             { background-color: #e5e5e5; }
+#vt100 .bgAnsi8             { background-color: #7f7f7f; }
+#vt100 .bgAnsi9             { background-color: #ff0000; }
+#vt100 .bgAnsi10            { background-color: #00ff00; }
+#vt100 .bgAnsi11            { background-color: #e8e800; }
+#vt100 .bgAnsi12            { background-color: #5c5cff; }
+#vt100 .bgAnsi13            { background-color: #ff00ff; }
+#vt100 .bgAnsi14            { background-color: #00ffff; }
+#vt100 .bgAnsi15            {                            }
+[endif DEFINES_COLORS]
+
+@media print {
+  #vt100 .scrollback {
+    display:            none;
+  }
+
+  #vt100 #reconnect, #vt100 #cursor, #vt100 #menu, #vt100 #kbd_button, #vt100 #keyboard { 
+    visibility:         hidden;
+  }
+
+  #vt100 #scrollable { 
+    overflow:           hidden;
+  }
+
+  #vt100 #console, #vt100 #alt_console { 
+    overflow:           hidden;
+    width:              1000000ex;
+  }
+}
diff --git a/apps/workbench/test/controllers/actions_controller_test.rb b/apps/workbench/test/controllers/actions_controller_test.rb

index 8745d96792985d74f7116eaeb660bbb5c599e143..9f561aa577037210f72ea06352254f71ae732d18 100644 (file)
--- a/apps/workbench/test/controllers/actions_controller_test.rb
+++ b/apps/workbench/test/controllers/actions_controller_test.rb
@@ -27,19 +27,18 @@ class ActionsControllerTest < ActionController::TestCase
  
      assert_response 302   # collection created and redirected to new collection page
  
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
      new_collection_uuid = response.headers['Location'].split('/')[-1]
  
      use_token :active
      collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
      manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
    end
  
    test "combine files  with repeated names into new collection" do
@@ -55,21 +54,19 @@ class ActionsControllerTest < ActionController::TestCase
  
      assert_response 302   # collection created and redirected to new collection page
  
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
      new_collection_uuid = response.headers['Location'].split('/')[-1]
  
      use_token :active
      collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
      manifest_text = collection['manifest_text']
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
-    assert manifest_text.include?('foo(2)'), 'Not found foo(2) in new collection manifest text'
-    assert manifest_text.include?('bar'), 'Not found bar in new collection manifest text'
-    assert manifest_text.include?('baz'), 'Not found baz in new collection manifest text'
-    assert manifest_text.include?('0:0:file1 0:0:file2 0:0:file3'),
-                'Not found 0:0:file1 0:0:file2 0:0:file3 in new collection manifest text'
-    assert manifest_text.include?('dir1/subdir'), 'Not found dir1/subdir in new collection manifest text'
-    assert manifest_text.include?('dir2'), 'Not found dir2 in new collection manifest text'
+    assert_includes(manifest_text, "foo(1)")
+    assert_includes(manifest_text, "foo(2)")
+    assert_includes(manifest_text, "bar")
+    assert_includes(manifest_text, "baz")
+    assert_includes(manifest_text, "0:0:file1 0:0:file2 0:0:file3")
+    assert_includes(manifest_text, "dir1/subdir")
+    assert_includes(manifest_text, "dir2")
    end
  
    test "combine collections with repeated filenames in almost similar directories and expect files with proper suffixes" do
@@ -90,26 +87,26 @@ class ActionsControllerTest < ActionController::TestCase
      collection = Collection.select([:uuid, :manifest_text]).where(uuid: new_collection_uuid).first
      manifest_text = collection['manifest_text']
  
-    assert manifest_text.include?('foo'), 'Not found foo in new collection manifest text'
-    assert manifest_text.include?('foo(1)'), 'Not found foo(1) in new collection manifest text'
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
  
      streams = manifest_text.split "\n"
      streams.each do |stream|
        if stream.start_with? './dir1'
          # dir1 stream
-        assert stream.include?(':alice(1)'), "Not found: alice(1) in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':bob.txt'), "Not found: bob.txt in dir1 in manifest text #{manifest_text}"
-        assert stream.include?(':carol.txt'), "Not found: carol.txt in dir1 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice(1)')
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
+        assert_includes(stream, ':bob.txt')
+        assert_includes(stream, ':carol.txt')
        elsif stream.start_with? './dir2'
          # dir2 stream
-        assert stream.include?(':alice.txt'), "Not found: alice.txt in dir2 in manifest text #{manifest_text}"
-        assert stream.include?(':alice(1).txt'), "Not found: alice(1).txt in dir2 in manifest text #{manifest_text}"
+        assert_includes(stream, ':alice.txt')
+        assert_includes(stream, ':alice(1).txt')
        elsif stream.start_with? '. '
          # . stream
-        assert stream.include?(':foo'), "Not found: foo in . in manifest text #{manifest_text}"
-        assert stream.include?(':foo(1)'), "Not found: foo(1) in . in manifest text #{manifest_text}"
+        assert_includes(stream, ':foo')
+        assert_includes(stream, ':foo(1)')
        end
      end
    end
@@ -123,7 +120,7 @@ class ActionsControllerTest < ActionController::TestCase
  
      assert_response 302   # collection created and redirected to new collection page
  
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
      new_collection_uuid = response.headers['Location'].split('/')[-1]
  
      use_token :active
@@ -134,12 +131,12 @@ class ActionsControllerTest < ActionController::TestCase
      assert_equal 2, streams.length
      streams.each do |stream|
        if stream.start_with? './dir1'
-        assert stream.include?('foo'), 'Not found: foo in dir1'
+        assert_includes(stream, 'foo')
        elsif stream.start_with? '. '
-        assert stream.include?('foo'), 'Not found: foo in .'
+        assert_includes(stream, 'foo')
        end
      end
-    assert !manifest_text.include?('foo(1)'), 'Found foo(1) in new collection manifest text'
+    refute_includes(manifest_text, 'foo(1)')
    end
  
    test "combine foo files from two different collection streams and expect proper filename suffixes" do
@@ -151,7 +148,7 @@ class ActionsControllerTest < ActionController::TestCase
  
      assert_response 302   # collection created and redirected to new collection page
  
-    assert response.headers['Location'].include? '/collections/'
+    assert_includes(response.headers['Location'], '/collections/')
      new_collection_uuid = response.headers['Location'].split('/')[-1]
  
      use_token :active
@@ -160,7 +157,46 @@ class ActionsControllerTest < ActionController::TestCase
  
      streams = manifest_text.split "\n"
      assert_equal 1, streams.length, "Incorrect number of streams in #{manifest_text}"
-    assert manifest_text.include?('foo'), "Not found foo in new collection manifest text #{manifest_text}"
-    assert manifest_text.include?('foo(1)'), "Not found foo(1) in new collection manifest text #{manifest_text}"
+    assert_includes(manifest_text, 'foo')
+    assert_includes(manifest_text, 'foo(1)')
+  end
+
+  [
+    ['collections', 'user_agreement_in_anonymously_accessible_project'],
+    ['groups', 'anonymously_accessible_project'],
+    ['jobs', 'running_job_in_publicly_accessible_project'],
+    ['pipeline_instances', 'pipeline_in_publicly_accessible_project'],
+    ['pipeline_templates', 'pipeline_template_in_publicly_accessible_project'],
+  ].each do |dm, fixture|
+    test "access show method for public #{dm} and expect to see page" do
+      Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+      get(:show, {uuid: api_fixture(dm)[fixture]['uuid']})
+      assert_response :redirect
+      if dm == 'groups'
+        assert_includes @response.redirect_url, "projects/#{fixture['uuid']}"
+      else
+        assert_includes @response.redirect_url, "#{dm}/#{fixture['uuid']}"
+      end
+    end
+  end
+
+  [
+    ['collections', 'foo_collection_in_aproject', 404],
+    ['groups', 'subproject_in_asubproject_with_same_name_as_one_in_active_user_home', 404],
+    ['jobs', 'job_with_latest_version', 404],
+    ['pipeline_instances', 'pipeline_owned_by_active_in_home', 404],
+    ['pipeline_templates', 'template_in_asubproject_with_same_name_as_one_in_active_user_home', 404],
+    ['traits', 'owned_by_aproject_with_no_name', :redirect],
+  ].each do |dm, fixture, expected|
+    test "access show method for non-public #{dm} and expect #{expected}" do
+      Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+      get(:show, {uuid: api_fixture(dm)[fixture]['uuid']})
+      assert_response expected
+      if expected == 404
+        assert_includes @response.inspect, 'Log in'
+      else
+        assert_match /\/users\/welcome/, @response.redirect_url
+      end
+    end
    end
  end
diff --git a/apps/workbench/test/controllers/application_controller_test.rb b/apps/workbench/test/controllers/application_controller_test.rb

index 47213169d5b2d94b4129d6f2b324ffda6001d000..15d52da3c34f5f12a45091968d5b875f3909872f 100644 (file)
--- a/apps/workbench/test/controllers/application_controller_test.rb
+++ b/apps/workbench/test/controllers/application_controller_test.rb
@@ -59,6 +59,7 @@ class ApplicationControllerTest < ActionController::TestCase
      [:preload_collections_for_objects, [] ],
      [:preload_log_collections_for_objects, [] ],
      [:preload_objects_for_dataclass, [] ],
+    [:preload_for_pdhs, [] ],
    ].each do |input|
      test "preload data for empty array input #{input}" do
        use_token :active
@@ -90,6 +91,8 @@ class ApplicationControllerTest < ActionController::TestCase
      [:preload_objects_for_dataclass, nil],
      [:object_for_dataclass, 'some_dataclass', nil],
      [:object_for_dataclass, nil, 'some_uuid'],
+    [:preload_for_pdhs, 'input not an array'],
+    [:preload_for_pdhs, nil],
    ].each do |input|
      test "preload data for wrong type input #{input}" do
        use_token :active
@@ -112,6 +115,7 @@ class ApplicationControllerTest < ActionController::TestCase
      [:collections_for_object, 'no-such-uuid' ],
      [:log_collections_for_object, 'no-such-uuid' ],
      [:object_for_dataclass, 'no-such-uuid' ],
+    [:collection_for_pdh, 'no-such-pdh' ],
    ].each do |input|
      test "get data for no such uuid #{input}" do
        use_token :active
@@ -125,6 +129,7 @@ class ApplicationControllerTest < ActionController::TestCase
          objects = ac.send input[0], input[1]
          assert objects, 'Expected objects'
          assert objects.is_a?(Array), 'Expected a array'
+        assert_empty objects
        end
      end
    end
@@ -300,6 +305,27 @@ class ApplicationControllerTest < ActionController::TestCase
      assert users.size == 3, 'Expected two objects in the preloaded hash'
    end
  
+  test "preload one collection each for given portable_data_hash list" do
+    use_token :active
+
+    ac = ApplicationController.new
+
+    pdh1 = api_fixture('collections')['foo_file']['portable_data_hash']
+    pdh2 = api_fixture('collections')['bar_file']['portable_data_hash']
+
+    pdhs = [pdh1, pdh2]
+    collections = ac.send :preload_for_pdhs, pdhs
+
+    assert collections, 'Expected collections map'
+    assert collections.is_a?(Hash), 'Expected a hash'
+    # Each pdh has more than one collection; however, we should get only one for each
+    assert collections.size == 2, 'Expected two objects in the preloaded collection hash'
+    assert collections[pdh1], 'Expected collections for the passed in pdh #{pdh1}'
+    assert_equal collections[pdh1].size, 1, 'Expected one collection for the passed in pdh #{pdh1}'
+    assert collections[pdh2], 'Expected collections for the passed in pdh #{pdh2}'
+    assert_equal collections[pdh2].size, 1, 'Expected one collection for the passed in pdh #{pdh2}'
+  end
+
    test "requesting a nonexistent object returns 404" do
      # We're really testing ApplicationController's find_object_by_uuid.
      # It's easiest to do that by instantiating a concrete controller.
@@ -362,4 +388,75 @@ class ApplicationControllerTest < ActionController::TestCase
        end
      end
    end
+
+  [
+    true,
+    false,
+  ].each do |config|
+    test "invoke show with include_accept_encoding_header config #{config}" do
+      Rails.configuration.include_accept_encoding_header_in_api_requests = config
+
+      @controller = CollectionsController.new
+      get(:show, {id: api_fixture('collections')['foo_file']['uuid']}, session_for(:admin))
+
+      assert_equal([['.', 'foo', 3]], assigns(:object).files)
+    end
+  end
+
+  test 'Edit name and verify that a duplicate is not created' do
+    @controller = ProjectsController.new
+    project = api_fixture("groups")["aproject"]
+    post :update, {
+      id: project["uuid"],
+      project: {
+        name: 'test name'
+      },
+      format: :json
+    }, session_for(:active)
+    assert_includes @response.body, 'test name'
+    updated = assigns(:object)
+    assert_equal updated.uuid, project["uuid"]
+    assert_equal 'test name', updated.name
+  end
+
+  [
+    [VirtualMachinesController.new, 'hostname', false],
+    [UsersController.new, 'first_name', true],
+  ].each do |controller, expect_str, expect_home_link|
+    test "access #{controller.controller_name} index as admin and verify Home link is#{' not' if !expect_home_link} shown" do
+      @controller = controller
+
+      get :index, {}, session_for(:admin)
+
+      assert_response 200
+      assert_includes @response.body, expect_str
+
+      home_link = "/projects/#{api_fixture('users')['active']['uuid']}"
+
+      if expect_home_link
+        refute_empty css_select("[href=\"/projects/#{api_fixture('users')['active']['uuid']}\"]")
+      else
+        assert_empty css_select("[href=\"/projects/#{api_fixture('users')['active']['uuid']}\"]")
+      end
+    end
+  end
+
+  [
+    [VirtualMachinesController.new, 'hostname', true],
+    [UsersController.new, 'first_name', false],
+  ].each do |controller, expect_str, expect_delete_link|
+    test "access #{controller.controller_name} index as admin and verify Delete option is#{' not' if !expect_delete_link} shown" do
+      @controller = controller
+
+      get :index, {}, session_for(:admin)
+
+      assert_response 200
+      assert_includes @response.body, expect_str
+      if expect_delete_link
+        refute_empty css_select('[data-method=delete]')
+      else
+        assert_empty css_select('[data-method=delete]')
+      end
+    end
+  end
  end
diff --git a/apps/workbench/test/controllers/collections_controller_test.rb b/apps/workbench/test/controllers/collections_controller_test.rb

index 65349c6fbd39fda0a38d962604eefaab2888feb5..13644e00bdce28db3460aa2f722f679deb107c7e 100644 (file)
--- a/apps/workbench/test/controllers/collections_controller_test.rb
+++ b/apps/workbench/test/controllers/collections_controller_test.rb
@@ -400,8 +400,39 @@ class CollectionsControllerTest < ActionController::TestCase
      }, session_for(:active)
      assert_response :success
      assert_not_nil assigns(:object)
+    # Ensure the Workbench response still has the original manifest_text
      assert_equal 'test description update', assigns(:object).description
-    assert_equal collection['manifest_text'], assigns(:object).manifest_text
+    assert_equal true, strip_signatures_and_compare(collection['manifest_text'], assigns(:object).manifest_text)
+    # Ensure the API server still has the original manifest_text after
+    # we called arvados.v1.collections.update
+    use_token :active do
+      assert_equal true, strip_signatures_and_compare(Collection.find(collection['uuid']).manifest_text,
+                                                      collection['manifest_text'])
+    end
+  end
+
+  # Since we got the initial collection from fixture, there are no signatures in manifest_text.
+  # However, after update or find, the collection retrieved will have singed manifest_text.
+  # Hence, let's compare each line after excluding signatures.
+  def strip_signatures_and_compare m1, m2
+    m1_lines = m1.split "\n"
+    m2_lines = m2.split "\n"
+
+    return false if m1_lines.size != m2_lines.size
+
+    m1_lines.each_with_index do |line, i|
+      m1_words = []
+      line.split.each do |word|
+        m1_words << word.split('+A')[0]
+      end
+      m2_words = []
+      m2_lines[i].split.each do |word|
+        m2_words << word.split('+A')[0]
+      end
+      return false if !m1_words.join(' ').eql?(m2_words.join(' '))
+    end
+
+    return true
    end
  
    test "view collection and verify none of the file types listed are disabled" do
@@ -430,4 +461,57 @@ class CollectionsControllerTest < ActionController::TestCase
  
      assert_equal files.sort, disabled.sort, "Expected to see all collection files in disabled list of files"
    end
+
+  test "anonymous user accesses collection in shared project" do
+    Rails.configuration.anonymous_user_token =
+      api_fixture('api_client_authorizations')['anonymous']['api_token']
+    collection = api_fixture('collections')['public_text_file']
+    get(:show, {id: collection['uuid']})
+
+    response_object = assigns(:object)
+    assert_equal collection['name'], response_object['name']
+    assert_equal collection['uuid'], response_object['uuid']
+    assert_includes @response.body, 'Hello world'
+    assert_includes @response.body, 'Content address'
+    refute_nil css_select('[href="#Advanced"]')
+  end
+
+  test "can view empty collection" do
+    get :show, {id: 'd41d8cd98f00b204e9800998ecf8427e+0'}, session_for(:active)
+    assert_includes @response.body, 'The following collections have this content'
+  end
+
+  test "collection portable data hash redirect" do
+    di = api_fixture('collections')['docker_image']
+    get :show, {id: di['portable_data_hash']}, session_for(:active)
+    assert_match /\/collections\/#{di['uuid']}/, @response.redirect_url
+  end
+
+  test "collection portable data hash with multiple matches" do
+    pdh = api_fixture('collections')['foo_file']['portable_data_hash']
+    get :show, {id: pdh}, session_for(:admin)
+    matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
+    assert matches.size > 1
+
+    matches.each do |k,v|
+      assert_match /href="\/collections\/#{v['uuid']}">.*#{v['name']}<\/a>/, @response.body
+    end
+
+    assert_includes @response.body, 'The following collections have this content:'
+    assert_not_includes @response.body, 'more results are not shown'
+    assert_not_includes @response.body, 'Activity'
+    assert_not_includes @response.body, 'Sharing and permissions'
+  end
+
+  test "collection page renders name" do
+    collection = api_fixture('collections')['foo_file']
+    get :show, {id: collection['uuid']}, session_for(:active)
+    assert_includes @response.body, collection['name']
+    assert_match /href="#{collection['uuid']}\/foo" ><\/i> foo</, @response.body
+  end
+
+  test "No Upload tab on non-writable collection" do
+    get :show, {id: api_fixture('collections')['user_agreement']['uuid']}, session_for(:active)
+    assert_not_includes @response.body, '<a href="#Upload"'
+  end
  end
diff --git a/apps/workbench/test/controllers/jobs_controller_test.rb b/apps/workbench/test/controllers/jobs_controller_test.rb

index 609e58c1c2dd72a863dd14037ce3a1eb441d30a7..1e2643526de415df8ac20eb4cad9a510f8b90732 100644 (file)
--- a/apps/workbench/test/controllers/jobs_controller_test.rb
+++ b/apps/workbench/test/controllers/jobs_controller_test.rb
@@ -1,4 +1,8 @@
  require 'test_helper'
  
  class JobsControllerTest < ActionController::TestCase
+  test "visit jobs index page" do
+    get :index, {}, session_for(:active)
+    assert_response :success
+  end
  end
diff --git a/apps/workbench/test/controllers/projects_controller_test.rb b/apps/workbench/test/controllers/projects_controller_test.rb

index ec17e8e4222676e50c7b92266b31a6180e359763..3416cc0e61026c9ef5d4e7caee3baf4289095ca2 100644 (file)
--- a/apps/workbench/test/controllers/projects_controller_test.rb
+++ b/apps/workbench/test/controllers/projects_controller_test.rb
@@ -138,6 +138,33 @@ class ProjectsControllerTest < ActionController::TestCase
      assert_equal api_fixture('users', 'subproject_admin')['uuid'], new_specimen.owner_uuid
    end
  
+  # An object which does not offer an expired_at field but has a xx_owner_uuid_name_unique constraint
+  # will be renamed when removed and another object with the same name exists in user's home project.
+  [
+    ['groups', 'subproject_in_asubproject_with_same_name_as_one_in_active_user_home'],
+    ['pipeline_templates', 'template_in_asubproject_with_same_name_as_one_in_active_user_home'],
+  ].each do |dm, fixture|
+    test "removing #{dm} from a subproject results in renaming it when there is another such object with same name in home project" do
+      object = api_fixture(dm, fixture)
+      delete(:remove_item,
+             { id: api_fixture('groups', 'asubproject')['uuid'],
+               item_uuid: object['uuid'],
+               format: 'js' },
+             session_for(:active))
+      assert_response :success
+      assert_match(/\b#{object['uuid']}\b/, @response.body,
+                   "removed object not named in response")
+      use_token :active
+      if dm.eql?('groups')
+        found = Group.find(object['uuid'])
+      else
+        found = PipelineTemplate.find(object['uuid'])
+      end
+      assert_equal api_fixture('users', 'active')['uuid'], found.owner_uuid
+      assert_equal true, found.name.include?(object['name'] + ' removed from ')
+    end
+  end
+
    test 'projects#show tab infinite scroll partial obeys limit' do
      get_contents_rows(limit: 1, filters: [['uuid','is_a',['arvados#job']]])
      assert_response :success
@@ -221,4 +248,174 @@ class ProjectsControllerTest < ActionController::TestCase
      assert_response :redirect
      assert_match /\/users\/welcome/, @response.redirect_url
    end
+
+  [
+    nil,
+    :active,
+  ].each do |user|
+    test "visit public projects page when anon config is enabled, as user #{user}, and expect page" do
+      Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+
+      if user
+        get :public, {}, session_for(user)
+      else
+        get :public
+      end
+
+      assert_response :success
+      assert_not_nil assigns(:objects)
+      project_names = assigns(:objects).collect(&:name)
+      assert_includes project_names, 'Unrestricted public data'
+      assert_not_includes project_names, 'A Project'
+      refute_empty css_select('[href="/projects/public"]')
+    end
+  end
+
+  test "visit public projects page when anon config is not enabled as active user and expect 404" do
+    get :public, {}, session_for(:active)
+    assert_response 404
+  end
+
+  test "visit public projects page when anon config is enabled but public projects page is disabled as active user and expect 404" do
+    Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+    Rails.configuration.enable_public_projects_page = false
+    get :public, {}, session_for(:active)
+    assert_response 404
+  end
+
+  test "visit public projects page when anon config is not enabled as anonymous and expect login page" do
+    get :public
+    assert_response :redirect
+    assert_match /\/users\/welcome/, @response.redirect_url
+    assert_empty css_select('[href="/projects/public"]')
+  end
+
+  test "visit public projects page when anon config is enabled and public projects page is disabled and expect login page" do
+    Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+    Rails.configuration.enable_public_projects_page = false
+    get :index
+    assert_response :redirect
+    assert_match /\/users\/welcome/, @response.redirect_url
+    assert_empty css_select('[href="/projects/public"]')
+  end
+
+  test "visit public projects page when anon config is not enabled and public projects page is enabled and expect login page" do
+    Rails.configuration.enable_public_projects_page = true
+    get :index
+    assert_response :redirect
+    assert_match /\/users\/welcome/, @response.redirect_url
+    assert_empty css_select('[href="/projects/public"]')
+  end
+
+  test "find a project and edit its description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = 'test description update'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, 'test description update'
+  end
+
+  test "find a project and edit description to textile description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = '*test bold description for textile formatting*'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, '<strong>test bold description for textile formatting</strong>'
+  end
+
+  test "find a project and edit description to html description" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+    found.description = 'Textile description with link to home page <a href="/">take me home</a>.'
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+    assert_includes @response.body, 'Textile description with link to home page <a href="/">take me home</a>.'
+  end
+
+  test "find a project and edit description to textile description with link to object" do
+    project = api_fixture('groups')['aproject']
+    use_token :active
+    found = Group.find(project['uuid'])
+
+    # uses 'Link to object' as a hyperlink for the object
+    found.description = '"Link to object":' + api_fixture('groups')['asubproject']['uuid']
+    found.save!
+    get(:show, {id: project['uuid']}, session_for(:active))
+
+    # check that input was converted to textile, not staying as inputted
+    refute_includes  @response.body,'"Link to object"'
+    refute_empty css_select('[href="/groups/zzzzz-j7d0g-axqo7eu9pwvna1x"]')
+  end
+
+  test "project viewer can't see project sharing tab" do
+    project = api_fixture('groups')['aproject']
+    get(:show, {id: project['uuid']}, session_for(:project_viewer))
+    refute_includes @response.body, '<div id="Sharing"'
+    assert_includes @response.body, '<div id="Data_collections"'
+  end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "#{username} can see project sharing tab" do
+     project = api_fixture('groups')['aproject']
+     get(:show, {id: project['uuid']}, session_for(username))
+     assert_includes @response.body, '<div id="Sharing"'
+     assert_includes @response.body, '<div id="Data_collections"'
+    end
+  end
+
+  [
+    ['admin',true],
+    ['active',true],
+    ['project_viewer',false],
+  ].each do |user, can_move|
+    test "#{user} can move subproject from project #{can_move}" do
+      get(:show, {id: api_fixture('groups')['aproject']['uuid']}, session_for(user))
+      if can_move
+        assert_includes @response.body, 'Move project...'
+      else
+        refute_includes @response.body, 'Move project...'
+      end
+    end
+  end
+
+  [
+    ["jobs", "/jobs"],
+    ["pipelines", "/pipeline_instances"],
+    ["collections", "/collections"],
+  ].each do |target,path|
+    test "test dashboard button all #{target}" do
+      get :index, {}, session_for(:active)
+      assert_includes @response.body, "href=\"#{path}\""
+      assert_includes @response.body, "All #{target}"
+    end
+  end
+
+  test "visit a public project and verify the public projects page link exists" do
+    Rails.configuration.anonymous_user_token = api_fixture('api_client_authorizations')['anonymous']['api_token']
+    uuid = api_fixture('groups')['anonymously_accessible_project']['uuid']
+    get :show, {id: uuid}
+    project = assigns(:object)
+    assert_equal uuid, project['uuid']
+    refute_empty css_select("[href=\"/projects/#{project['uuid']}\"]")
+    assert_includes @response.body, "<a href=\"/projects/public\">Public Projects</a>"
+  end
+
+  test 'all_projects unaffected by params after use by ProjectsController (#6640)' do
+    @controller = ProjectsController.new
+    project_uuid = api_fixture('groups')['aproject']['uuid']
+    get :index, {
+      filters: [['uuid', '<', project_uuid]].to_json,
+      limit: 0,
+      offset: 1000,
+    }, session_for(:active)
+    assert_select "#projects-menu + ul li.divider ~ li a[href=/projects/#{project_uuid}]"
+  end
  end
diff --git a/apps/workbench/test/controllers/repositories_controller_test.rb b/apps/workbench/test/controllers/repositories_controller_test.rb

index f95bb7731fab4bd86888d18ded3a6af0e2a6eb6b..25bf55768529f58327e7f8861835516e0b0f5f06 100644 (file)
--- a/apps/workbench/test/controllers/repositories_controller_test.rb
+++ b/apps/workbench/test/controllers/repositories_controller_test.rb
@@ -1,7 +1,9 @@
  require 'test_helper'
+require 'helpers/repository_stub_helper'
  require 'helpers/share_object_helper'
  
  class RepositoriesControllerTest < ActionController::TestCase
+  include RepositoryStubHelper
    include ShareObjectHelper
  
    [
@@ -62,4 +64,61 @@ class RepositoriesControllerTest < ActionController::TestCase
        end
      end
    end
+
+  ### Browse repository content
+
+  [:active, :spectator].each do |user|
+    test "show tree to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, _, _ = stub_repo_content
+      get :show_tree, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+      }, session_for(user)
+      assert_response :success
+      assert_select 'tr td a', 'COPYING'
+      assert_select 'tr td', '625 bytes'
+      assert_select 'tr td a', 'apps'
+      assert_select 'tr td a', 'workbench'
+      assert_select 'tr td a', 'Gemfile'
+      assert_select 'tr td', '33.7 KiB'
+    end
+
+    test "show commit to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, commit, _ = stub_repo_content
+      get :show_commit, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+      }, session_for(user)
+      assert_response :success
+      assert_select 'pre', h(commit)
+    end
+
+    test "show blob to #{user}" do
+      reset_api_fixtures_after_test false
+      sha1, _, filedata = stub_repo_content filename: 'COPYING'
+      get :show_blob, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+        path: 'COPYING',
+      }, session_for(user)
+      assert_response :success
+      assert_select 'pre', h(filedata)
+    end
+  end
+
+  ['', '/'].each do |path|
+    test "show tree with path '#{path}'" do
+      reset_api_fixtures_after_test false
+      sha1, _, _ = stub_repo_content filename: 'COPYING'
+      get :show_tree, {
+        id: api_fixture('repositories')['foo']['uuid'],
+        commit: sha1,
+        path: path,
+      }, session_for(:active)
+      assert_response :success
+      assert_select 'tr td', 'COPYING'
+    end
+  end
  end
diff --git a/apps/workbench/test/controllers/users_controller_test.rb b/apps/workbench/test/controllers/users_controller_test.rb

index c1436da4545e93197c95d2b850614cf55c95cafc..71354b82f3bf59650b05e49adaa905318c45845a 100644 (file)
--- a/apps/workbench/test/controllers/users_controller_test.rb
+++ b/apps/workbench/test/controllers/users_controller_test.rb
@@ -1,6 +1,7 @@
  require 'test_helper'
  
  class UsersControllerTest < ActionController::TestCase
+
    test "valid token works in controller test" do
      get :index, {}, session_for(:active)
      assert_response :success
@@ -31,7 +32,7 @@ class UsersControllerTest < ActionController::TestCase
    end
  
    test "show repositories with read, write, or manage permission" do
-    get :manage_account, {}, session_for(:active)
+    get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active)
      assert_response :success
      repos = assigns(:my_repositories)
      assert repos
@@ -42,7 +43,7 @@ class UsersControllerTest < ActionController::TestCase
    end
  
    test "show repositories lists linked as well as owned repositories" do
-    get :manage_account, {}, session_for(:active)
+    get :repositories, {id: api_fixture("users")['active']['uuid']}, session_for(:active)
      assert_response :success
      repos = assigns(:my_repositories)
      assert repos
@@ -74,4 +75,48 @@ class UsersControllerTest < ActionController::TestCase
      end
      assert_equal 1, found_email, "Expected 1 email after requesting shell access"
    end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "access users page as #{username} and verify show button is available" do
+      admin_user = api_fixture('users','admin')
+      active_user = api_fixture('users','active')
+      get :index, {}, session_for(username)
+      if username == 'admin'
+        assert_match /<a href="\/projects\/#{admin_user['uuid']}">Home<\/a>/, @response.body
+        assert_match /<a href="\/projects\/#{active_user['uuid']}">Home<\/a>/, @response.body
+        assert_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_includes @response.body, admin_user['email']
+        assert_includes @response.body, active_user['email']
+      else
+        refute_match  /Home<\/a>/, @response.body
+        refute_match /href="\/users\/#{admin_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_match /href="\/users\/#{active_user['uuid']}" title="show user"><i class="fa fa-fw fa-user"><\/i> Show<\/a/, @response.body
+        assert_includes @response.body, active_user['email']
+      end
+    end
+  end
+
+  [
+    'admin',
+    'active',
+  ].each do |username|
+    test "access settings drop down menu as #{username}" do
+      admin_user = api_fixture('users','admin')
+      active_user = api_fixture('users','active')
+      get :show, {
+        id: api_fixture('users')[username]['uuid']
+      }, session_for(username)
+      if username == 'admin'
+        assert_includes @response.body, admin_user['email']
+        refute_empty css_select('[id="system-menu"]')
+      else
+        assert_includes @response.body, active_user['email']
+        assert_empty css_select('[id="system-menu"]')
+      end
+    end
+  end
  end
diff --git a/apps/workbench/test/helpers/collections_helper_test.rb b/apps/workbench/test/helpers/collections_helper_test.rb

index e7accad42335dfc16f4edfe3996516288f8b3a29..9d411147aef1c9dee5d98c5e965e9869747ca439 100644 (file)
--- a/apps/workbench/test/helpers/collections_helper_test.rb
+++ b/apps/workbench/test/helpers/collections_helper_test.rb
@@ -1,6 +1,8 @@
  require 'test_helper'
  
  class CollectionsHelperTest < ActionView::TestCase
+  reset_api_fixtures :after_each_test, false
+
    [
      ["filename.csv", true],
      ["filename.fa", true],
@@ -25,6 +27,7 @@ class CollectionsHelperTest < ActionView::TestCase
      ["filename.yml", true],
  
      ["filename.bam", false],
+    ["filename.tar", false],
      ["filename", false],
    ].each do |file_name, preview_allowed|
      test "verify '#{file_name}' is allowed for preview #{preview_allowed}" do
diff --git a/apps/workbench/test/helpers/manifest_examples.rb b/apps/workbench/test/helpers/manifest_examples.rb

new file mode 120000 (symlink)

index 0000000..cb908ef
--- /dev/null
+++ b/apps/workbench/test/helpers/manifest_examples.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/manifest_examples.rb
+\ No newline at end of file
diff --git a/apps/workbench/test/helpers/repository_stub_helper.rb b/apps/workbench/test/helpers/repository_stub_helper.rb

new file mode 100644 (file)

index 0000000..b7d0573
--- /dev/null
+++ b/apps/workbench/test/helpers/repository_stub_helper.rb
@@ -0,0 +1,33 @@
+module RepositoryStubHelper
+  # Supply some fake git content.
+  def stub_repo_content opts={}
+    fakesha1 = opts[:sha1] || 'abcdefabcdefabcdefabcdefabcdefabcdefabcd'
+    fakefilename = opts[:filename] || 'COPYING'
+    fakefilesrc = File.expand_path('../../../../../'+fakefilename, __FILE__)
+    fakefile = File.read fakefilesrc
+    fakecommit = <<-EOS
+      commit abcdefabcdefabcdefabcdefabcdefabcdefabcd
+      Author: Fake R <fake@example.com>
+      Date:   Wed Apr 1 11:59:59 2015 -0400
+
+          It's a fake commit.
+
+    EOS
+    Repository.any_instance.stubs(:ls_tree_lr).with(fakesha1).returns <<-EOS
+      100644 blob eec475862e6ec2a87554e0fca90697e87f441bf5     226    .gitignore
+      100644 blob acbd7523ed49f01217874965aa3180cccec89d61     625    COPYING
+      100644 blob d645695673349e3947e8e5ae42332d0ac3164cd7   11358    LICENSE-2.0.txt
+      100644 blob c7a36c355b4a2b94dfab45c9748330022a788c91     622    README
+      100644 blob dba13ed2ddf783ee8118c6a581dbf75305f816a3   34520    agpl-3.0.txt
+      100644 blob 9bef02bbfda670595750fd99a4461005ce5b8f12     695    apps/workbench/.gitignore
+      100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
+      100644 blob b51f674d90f68bfb50d9304068f915e42b04aea4    2249    apps/workbench/Gemfile
+      100755 blob cdd5ebaff27781f93ab85e484410c0ce9e97770f    1012    crunch_scripts/hash
+    EOS
+    Repository.any_instance.
+      stubs(:cat_file).with(fakesha1, fakefilename).returns fakefile
+    Repository.any_instance.
+      stubs(:show).with(fakesha1).returns fakecommit
+    return fakesha1, fakecommit, fakefile
+  end
+end
diff --git a/apps/workbench/test/helpers/share_object_helper.rb b/apps/workbench/test/helpers/share_object_helper.rb

index ba09acc810dcf0e908cb9de811e07948d4dddf32..9d8f8d03252d036cf9dca696b833a1097f17e46b 100644 (file)
--- a/apps/workbench/test/helpers/share_object_helper.rb
+++ b/apps/workbench/test/helpers/share_object_helper.rb
@@ -19,6 +19,7 @@ module ShareObjectHelper
        # Otherwise, the not-included assertions might falsely pass because
        # the modal hasn't loaded yet.
        find(".selectable", text: name).click
+      assert_text "Only #{share_type} you are allowed to access are shown"
        assert(has_no_selector?(".modal-dialog-preview-pane"),
               "preview pane available in sharing dialog")
        if share_type == 'users' and obj and obj['email']
diff --git a/apps/workbench/test/helpers/time_block.rb b/apps/workbench/test/helpers/time_block.rb

new file mode 120000 (symlink)

index 0000000..afb43e7
--- /dev/null
+++ b/apps/workbench/test/helpers/time_block.rb
@@ -0,0 +1 @@
+../../../../services/api/test/helpers/time_block.rb
+\ No newline at end of file
diff --git a/apps/workbench/test/integration/anonymous_access_test.rb b/apps/workbench/test/integration/anonymous_access_test.rb

index 0fb21b2d2f063530d0d175e2229c46d35de70408..aabbf00dc206e77090d1d9096c3abed23eec8530 100644 (file)
--- a/apps/workbench/test/integration/anonymous_access_test.rb
+++ b/apps/workbench/test/integration/anonymous_access_test.rb
@@ -18,13 +18,20 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
        if user['is_active']
          assert_text 'Unrestricted public data'
          assert_selector 'a', text: 'Projects'
+        page.find("#projects-menu").click
+        within('.dropdown-menu') do
+          assert_selector 'a', text: 'Search all projects'
+          assert_selector "a[href=\"/projects/public\"]", text: 'Browse public projects'
+          assert_selector 'a', text: 'Add a new project'
+          assert_selector 'li[class="dropdown-header"]', text: 'My projects'
+        end
        else
          assert_text 'indicate that you have read and accepted the user agreement'
        end
        within('.navbar-fixed-top') do
          assert_selector 'a', text: Rails.configuration.site_name.downcase
-        assert_selector 'a', text: "#{user['email']}"
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
          within('.dropdown-menu') do
            assert_selector 'a', text: 'Log out'
          end
@@ -35,6 +42,7 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
          assert_text Rails.configuration.site_name.downcase
          assert_no_selector 'a', text: Rails.configuration.site_name.downcase
          assert_selector 'a', text: 'Log in'
+        assert_selector 'a', text: 'Browse public projects'
        end
      end
    end
@@ -62,8 +70,8 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
      assert_selector 'a', text: 'Data collections'
      assert_selector 'a', text: 'Jobs and pipelines'
      assert_selector 'a', text: 'Pipeline templates'
+    assert_selector 'a', text: 'Subprojects'
      assert_selector 'a', text: 'Advanced'
-    assert_no_selector 'a', text: 'Subprojects'
      assert_no_selector 'a', text: 'Other objects'
      assert_no_selector 'button', text: 'Add data'
  
@@ -140,6 +148,7 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
      within first('tr', text: look_for) do
        click_link 'Show'
      end
+    assert_text 'Public Projects Unrestricted public data'
      assert_text 'script_version'
  
      assert_text 'zzzzz-tpzed-xurymjxw79nv3jz' # modified by user
@@ -156,6 +165,7 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
      end
  
      # in pipeline instance page
+    assert_text 'Public Projects Unrestricted public data'
      assert_text 'This pipeline is complete'
      assert_no_selector 'a', text: 'Re-run with latest'
      assert_no_selector 'a', text: 'Re-run options'
@@ -176,7 +186,131 @@ class AnonymousAccessTest < ActionDispatch::IntegrationTest
      end
  
      # in template page
+    assert_text 'Public Projects Unrestricted public data'
      assert_text 'script version'
      assert_no_selector 'a', text: 'Run this pipeline'
    end
+
+  test "anonymous user accesses subprojects tab in shared project" do
+    visit PUBLIC_PROJECT + '#Subprojects'
+
+    assert_text 'Subproject in anonymous accessible project'
+
+    within first('tr[data-kind="arvados#group"]') do
+      click_link 'Show'
+    end
+
+    # in subproject
+    assert_text 'Description for subproject in anonymous accessible project'
+  end
+
+  [
+    ['pipeline_in_publicly_accessible_project', true],
+    ['pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false],
+    ['pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false, 'spectator'],
+    ['pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', true, 'admin'],
+
+    ['completed_job_in_publicly_accessible_project', true],
+    ['job_in_publicly_accessible_project_but_other_objects_elsewhere', false],
+  ].each do |fixture, objects_readable, user=nil|
+    test "access #{fixture} in public project with objects readable=#{objects_readable} with user #{user}" do
+      pipeline_page = true if fixture.include?('pipeline')
+
+      if pipeline_page
+        object = api_fixture('pipeline_instances')[fixture]
+        page = "/pipeline_instances/#{object['uuid']}"
+        expect_log_text = "Log for foo"
+      else      # job
+        object = api_fixture('jobs')[fixture]
+        page = "/jobs/#{object['uuid']}"
+        expect_log_text = "stderr crunchstat"
+      end
+
+      if user
+        visit page_with_token user, page
+      else
+        visit page
+      end
+
+      # click job link, if in pipeline page
+      click_link 'foo' if pipeline_page
+
+      if objects_readable
+        assert_selector 'a[href="#Log"]', text: 'Log'
+        assert_no_selector 'a[data-toggle="disabled"]', text: 'Log'
+        assert_no_text 'Output data not available'
+        if pipeline_page
+          assert_text 'This pipeline was created from'
+          assert_selector 'a', text: object['components']['foo']['job']['uuid']
+          # We'd like to test the Log tab on job pages too, but we can't right
+          # now because Poltergeist 1.x doesn't support JavaScript's
+          # Function.prototype.bind, which is used by job_log_graph.js.
+          click_link "Log"
+          assert_text expect_log_text
+        end
+      else
+        assert_selector 'a[data-toggle="disabled"]', text: 'Log'
+        assert_text 'Output data not available'
+        assert_text object['job']
+        if pipeline_page
+          assert_no_text 'This pipeline was created from'  # template is not readable
+          assert_no_selector 'a', text: object['components']['foo']['job']['uuid']
+        end
+        click_link "Log"
+        assert_text 'Output data not available'
+        assert_no_text expect_log_text
+      end
+    end
+  end
+
+  [
+    ['new_pipeline_in_publicly_accessible_project', true],
+    ['new_pipeline_in_publicly_accessible_project', true, 'spectator'],
+    ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false],
+    ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', false, 'spectator'],
+    ['new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere', true, 'admin'],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', false],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', false, 'spectator'],
+    ['new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere', true, 'admin'],
+  ].each do |fixture, objects_readable, user=nil|
+    test "access #{fixture} in public project with objects readable=#{objects_readable} with user #{user}" do
+      object = api_fixture('pipeline_instances')[fixture]
+      page = "/pipeline_instances/#{object['uuid']}"
+      if user
+        visit page_with_token user, page
+      else
+        visit page
+      end
+
+      # click Components tab
+      click_link 'Components'
+
+      if objects_readable
+        assert_text 'This pipeline was created from'
+        if user == 'admin'
+          assert_text 'input'
+          assert_selector 'a', text: 'Choose'
+          assert_selector 'a', text: 'Run'
+          assert_no_selector 'a.disabled', text: 'Run'
+        else
+          assert_selector 'a', text: object['components']['foo']['script_parameters']['input']['value']
+          user ? (assert_selector 'a', text: 'Run') : (assert_no_selector 'a', text: 'Run')
+        end
+      else
+        assert_no_text 'This pipeline was created from'  # template is not readable
+        input = object['components']['foo']['script_parameters']['input']['value']
+        assert_no_selector 'a', text: input
+        if user
+          input = input.gsub('/', '\\/')
+          assert_text "One or more inputs provided are not readable"
+          assert_selector "input[type=text][value=#{input}]"
+          assert_selector 'a.disabled', text: 'Run'
+        else
+          assert_no_text "One or more inputs provided are not readable"
+          assert_text input
+          assert_no_selector 'a', text: 'Run'
+        end
+      end
+    end
+  end
  end
diff --git a/apps/workbench/test/integration/application_layout_test.rb b/apps/workbench/test/integration/application_layout_test.rb

index 84acdc61148e91b14a1b365928dec786a5f612e9..61ba16294f85f5d7bacaf02486b3f83a2e253fac 100644 (file)
--- a/apps/workbench/test/integration/application_layout_test.rb
+++ b/apps/workbench/test/integration/application_layout_test.rb
@@ -24,7 +24,10 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
        else
          assert page.has_link?("Projects"), 'Not found link - Projects'
          page.find("#projects-menu").click
-        assert page.has_text?('Projects shared with me'), 'Not found text - Project shared with me'
+        assert_selector 'a', text: 'Search all projects'
+        assert_no_selector 'a', text: 'Browse public projects'
+        assert_selector 'a', text: 'Add a new project'
+        assert_selector 'li[class="dropdown-header"]', text: 'My projects'
        end
      elsif invited
        assert page.has_text?('Please check the box below to indicate that you have read and accepted the user agreement'), 'Not found text - Please check the box below . . .'
@@ -40,22 +43,30 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
        else
          # my account menu
          assert_selector 'a', text: Rails.configuration.site_name.downcase
-        assert page.has_link?("#{user['email']}"), 'Not found link - email'
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
          within('.dropdown-menu') do
            if user['is_active']
              assert page.has_no_link?('Not active'), 'Found link - Not active'
              assert page.has_no_link?('Sign agreements'), 'Found link - Sign agreements'
  
-            assert page.has_link?('Manage account'), 'No link - Manage account'
+            assert_selector "a[href=\"/projects/#{user['uuid']}\"]", text: 'Home project'
+            assert_selector "a[href=\"/users/#{user['uuid']}/virtual_machines\"]", text: 'Virtual machines'
+            assert_selector "a[href=\"/users/#{user['uuid']}/repositories\"]", text: 'Repositories'
+            assert_selector "a[href=\"/current_token\"]", text: 'Current token'
+            assert_selector "a[href=\"/users/#{user['uuid']}/ssh_keys\"]", text: 'SSH keys'
  
              if profile_config
-              assert page.has_link?('Manage profile'), 'No link - Manage profile'
+              assert_selector "a[href=\"/users/#{user['uuid']}/profile\"]", text: 'Manage profile'
              else
-              assert page.has_no_link?('Manage profile'), 'Found link - Manage profile'
+              assert_no_selector "a[href=\"/users/#{user['uuid']}/profile\"]", text: 'Manage profile'
              end
            else
-            assert page.has_no_link?('Manage account'), 'Found link - Manage account'
+            assert_no_selector 'a', text: 'Home project'
+            assert page.has_no_link?('Virtual machines'), 'Found link - Virtual machines'
+            assert page.has_no_link?('Repositories'), 'Found link - Repositories'
+            assert page.has_no_link?('Current token'), 'Found link - Current token'
+            assert page.has_no_link?('SSH keys'), 'Found link - SSH keys'
              assert page.has_no_link?('Manage profile'), 'Found link - Manage profile'
            end
            assert page.has_link?('Log out'), 'No link - Log out'
@@ -69,6 +80,8 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
      within('.navbar-fixed-top') do
        page.find("#arv-help").click
        within('.dropdown-menu') do
+        assert_selector 'a', text:'Getting Started ...'
+        assert_selector 'a', text:'Public Pipelines and Data sets'
          assert page.has_link?('Tutorials and User guide'), 'No link - Tutorials and User guide'
          assert page.has_link?('API Reference'), 'No link - API Reference'
          assert page.has_link?('SDK Reference'), 'No link - SDK Reference'
@@ -106,7 +119,8 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
      ['active', api_fixture('users')['active'], true, true],
      ['admin', api_fixture('users')['admin'], true, true],
      ['active_no_prefs', api_fixture('users')['active_no_prefs'], true, false],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile'], true, false],
+    ['active_no_prefs_profile_no_getting_started_shown',
+        api_fixture('users')['active_no_prefs_profile_no_getting_started_shown'], true, false],
    ].each do |token, user, invited, has_profile|
  
      test "visit home page for user #{token}" do
@@ -139,4 +153,100 @@ class ApplicationLayoutTest < ActionDispatch::IntegrationTest
        verify_system_menu user
      end
    end
+
+  test "test getting started help menu item" do
+    visit page_with_token('active')
+    within '.navbar-fixed-top' do
+      find('.help-menu > a').click
+      find('.help-menu .dropdown-menu a', text: 'Getting Started ...').click
+    end
+
+    within '.modal-content' do
+      assert_text 'Getting Started'
+      assert_selector 'button:not([disabled])', text: 'Next'
+      assert_no_selector 'button:not([disabled])', text: 'Prev'
+
+      # Use Next button to enable Prev button
+      click_button 'Next'
+      assert_selector 'button:not([disabled])', text: 'Prev'  # Prev button is now enabled
+      click_button 'Prev'
+      assert_no_selector 'button:not([disabled])', text: 'Prev'  # Prev button is again disabled
+
+      # Click Next until last page is reached and verify that it is disabled
+      (0..20).each do |i|   # currently we only have 4 pages, and don't expect to have more than 20 in future
+        click_button 'Next'
+        begin
+          find('button:not([disabled])', text: 'Next')
+        rescue => e
+          break
+        end
+      end
+      assert_no_selector 'button:not([disabled])', text: 'Next'  # Next button is disabled
+      assert_selector 'button:not([disabled])', text: 'Prev'     # Prev button is enabled
+      click_button 'Prev'
+      assert_selector 'button:not([disabled])', text: 'Next'     # Next button is now enabled
+
+      first('button', text: 'x').click
+    end
+    assert_text 'Active pipelines' # seeing dashboard now
+  end
+
+  test "test arvados_public_data_doc_url config unset" do
+    Rails.configuration.arvados_public_data_doc_url = false
+
+    visit page_with_token('active')
+    within '.navbar-fixed-top' do
+      find('.help-menu > a').click
+
+      assert_no_selector 'a', text:'Public Pipelines and Data sets'
+
+      assert_selector 'a', text:'Getting Started ...'
+      assert page.has_link?('Tutorials and User guide'), 'No link - Tutorials and User guide'
+      assert page.has_link?('API Reference'), 'No link - API Reference'
+      assert page.has_link?('SDK Reference'), 'No link - SDK Reference'
+      assert page.has_link?('Show version / debugging info ...'), 'No link - Show version / debugging info'
+      assert page.has_link?('Report a problem ...'), 'No link - Report a problem'
+    end
+  end
+
+  test "no SSH public key notification when shell_in_a_box_url is configured" do
+    Rails.configuration.shell_in_a_box_url = 'example.com'
+    visit page_with_token('job_reader')
+    click_link 'notifications-menu'
+    assert_no_selector 'a', text:'Click here to set up an SSH public key for use with Arvados.'
+    assert_selector 'a', text:'Click here to learn how to run an Arvados Crunch pipeline'
+  end
+
+   [
+    ['Repositories',nil,'s0uqq'],
+    ['Virtual machines','virtual machine','current_user_logins'],
+    ['SSH keys',nil,'public_key'],
+    ['Links','link','link_class'],
+    ['Groups','group','group_class'],
+    ['Compute nodes','node','info[ping_secret'],
+    ['Keep services','keep service','service_ssl_flag'],
+    ['Keep disks', 'keep disk','bytes_free'],
+  ].each do |page_name, add_button_text, look_for|
+    test "test system menu #{page_name} link" do
+      visit page_with_token('admin')
+      within('.navbar-fixed-top') do
+        page.find("#system-menu").click
+        within('.dropdown-menu') do
+          assert_selector 'a', text: page_name
+          find('a', text: page_name).click
+        end
+      end
+
+      # click the add button if it exists
+      if add_button_text
+        assert_selector 'button', text: "Add a new #{add_button_text}"
+        find('button', text: "Add a new #{add_button_text}").click
+      else
+        assert_no_selector 'button', text:"Add a new"
+      end
+
+      # look for unique property in the current page
+      assert_text look_for
+    end
+  end
  end
diff --git a/apps/workbench/test/integration/collection_upload_test.rb b/apps/workbench/test/integration/collection_upload_test.rb

index 6960d3bc899d47bdfe929719f88dd24615db3f98..62efee4d67e6b4e5a84e2340bcc55902b18ba30d 100644 (file)
--- a/apps/workbench/test/integration/collection_upload_test.rb
+++ b/apps/workbench/test/integration/collection_upload_test.rb
@@ -28,13 +28,6 @@ class CollectionUploadTest < ActionDispatch::IntegrationTest
      assert_selector 'div#Upload.active div.panel'
    end
  
-  test "No Upload tab on non-writable collection" do
-    need_javascript
-    visit(page_with_token 'active',
-          '/collections/'+api_fixture('collections')['user_agreement']['uuid'])
-    assert_no_selector '.nav-tabs Upload'
-  end
-
    test "Upload two empty files with the same name" do
      need_selenium "to make file uploads work"
      visit page_with_token 'active', sandbox_path
diff --git a/apps/workbench/test/integration/collections_test.rb b/apps/workbench/test/integration/collections_test.rb

index 4a7014c37db83639e34ae01dbe5e0a013d41041c..4f66e9d6b58bf4e9402a95ab21192268fa9edd1a 100644 (file)
--- a/apps/workbench/test/integration/collections_test.rb
+++ b/apps/workbench/test/integration/collections_test.rb
@@ -1,20 +1,11 @@
  require 'integration_helper'
+require_relative 'integration_test_utils'
  
  class CollectionsTest < ActionDispatch::IntegrationTest
    setup do
      need_javascript
    end
  
-  # check_checkboxes_state asserts that the page holds at least one
-  # checkbox matching 'selector', and that all matching checkboxes
-  # are in state 'checkbox_status' (i.e. checked if true, unchecked otherwise)
-  def assert_checkboxes_state(selector, checkbox_status, msg=nil)
-    assert page.has_selector?(selector)
-    page.all(selector).each do |checkbox|
-      assert(checkbox.checked? == checkbox_status, msg)
-    end
-  end
-
    test "Can copy a collection to a project" do
      collection_uuid = api_fixture('collections')['foo_file']['uuid']
      collection_name = api_fixture('collections')['foo_file']['name']
@@ -29,17 +20,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
      assert_text "Copy of #{collection_name}"
    end
  
-  test "Collection page renders name" do
-    Capybara.current_driver = :rack_test
-    uuid = api_fixture('collections')['foo_file']['uuid']
-    coll_name = api_fixture('collections')['foo_file']['name']
-    visit page_with_token('active', "/collections/#{uuid}")
-    assert(page.has_text?(coll_name), "Collection page did not include name")
-    # Now check that the page is otherwise normal, and the collection name
-    # isn't only showing up in an error message.
-    assert(page.has_link?('foo'), "Collection page did not include file link")
-  end
-
    def check_sharing(want_state, link_regexp)
      # We specifically want to click buttons.  See #4291.
      if want_state == :off
@@ -100,13 +80,6 @@ class CollectionsTest < ActionDispatch::IntegrationTest
      end
    end
  
-  test "can view empty collection" do
-    Capybara.current_driver = :rack_test
-    uuid = 'd41d8cd98f00b204e9800998ecf8427e+0'
-    visit page_with_token('active', "/collections/#{uuid}")
-    assert page.has_text?(/This collection is empty|The following collections have this content/)
-  end
-
    test "combine selected collections into new collection" do
      foo_collection = api_fixture('collections')['foo_file']
      bar_collection = api_fixture('collections')['bar_file']
@@ -200,29 +173,16 @@ class CollectionsTest < ActionDispatch::IntegrationTest
      assert(page.has_text?('file2_in_subdir4.txt'), 'file not found - file1_in_subdir4.txt')
    end
  
-  test "Collection portable data hash redirect" do
-    di = api_fixture('collections')['docker_image']
-    visit page_with_token('active', "/collections/#{di['portable_data_hash']}")
-
-    # check redirection
-    assert current_path.end_with?("/collections/#{di['uuid']}")
-    assert page.has_text?("docker_image")
-    assert page.has_text?("Activity")
-    assert page.has_text?("Sharing and permissions")
-  end
-
-  test "Collection portable data hash with multiple matches" do
+  test "Collection portable data hash with multiple matches with more than one page of results" do
      pdh = api_fixture('collections')['baz_file']['portable_data_hash']
      visit page_with_token('admin', "/collections/#{pdh}")
  
-    matches = api_fixture('collections').select {|k,v| v["portable_data_hash"] == pdh}
-    assert matches.size > 1
+    assert_selector 'a', text: 'Collection_1'
  
-    matches.each do |k,v|
-      assert page.has_link?(v["name"]), "Page /collections/#{pdh} should contain link '#{v['name']}'"
-    end
-    assert page.has_no_text?("Activity")
-    assert page.has_no_text?("Sharing and permissions")
+    assert_text 'The following collections have this content:'
+    assert_text 'more results are not shown'
+    assert_no_text 'Activity'
+    assert_no_text 'Sharing and permissions'
    end
  
    test "Filtering collection files by regexp" do
diff --git a/apps/workbench/test/integration/errors_test.rb b/apps/workbench/test/integration/errors_test.rb

index 32f16a68dfa9bb7da49f0adbce9881aa0480aeda..f2067a92bfdb27233b59fbc4ddb730156c5ce8c8 100644 (file)
--- a/apps/workbench/test/integration/errors_test.rb
+++ b/apps/workbench/test/integration/errors_test.rb
@@ -9,7 +9,7 @@ class ErrorsTest < ActionDispatch::IntegrationTest
  
    test "error page renders user navigation" do
      visit(page_with_token("active", "/collections/#{BAD_UUID}"))
-    assert(page.has_text?(api_fixture("users")["active"]["email"]),
+    assert(page.has_link?("notifications-menu"),
             "User information missing from error page")
      assert(page.has_no_text?(/log ?in/i),
             "Logged in user prompted to log in on error page")
@@ -17,7 +17,7 @@ class ErrorsTest < ActionDispatch::IntegrationTest
  
    test "no user navigation with expired token" do
      visit(page_with_token("expired", "/collections/#{BAD_UUID}"))
-    assert(page.has_no_text?(api_fixture("users")["active"]["email"]),
+    assert(page.has_no_link?("notifications-menu"),
             "Page visited with expired token included user information")
      assert(page.has_selector?("a", text: /log ?in/i),
             "Login prompt missing on expired token error page")
@@ -51,10 +51,10 @@ class ErrorsTest < ActionDispatch::IntegrationTest
    # it has a too-limited token, these tests will need to be adjusted.
    test "API error page includes error token" do
      start_stamp = now_timestamp
-    visit(page_with_token("active_readonly", "/authorized_keys"))
-    click_on "Add a new authorized key"
+    visit(page_with_token("active_readonly", "/groups"))
+    click_on "Add a new group"
      assert(page.has_text?(/fiddlesticks/i),
-           "Not on an error page after making an SSH key out of scope")
+           "Not on an error page after making a group out of scope")
      assert(page_has_error_token?(start_stamp), "no error token on 404 page")
    end
  
@@ -79,50 +79,45 @@ class ErrorsTest < ActionDispatch::IntegrationTest
    end
  
    test "API error page has Report problem button" do
+    # point to a bad api server url to generate fiddlesticks error
      original_arvados_v1_base = Rails.configuration.arvados_v1_base
+    Rails.configuration.arvados_v1_base = "https://[::1]:1/"
  
-    begin
-      # point to a bad api server url to generate fiddlesticks error
-      Rails.configuration.arvados_v1_base = "https://[100::f]:1/"
+    visit page_with_token("active")
  
-      visit page_with_token("active")
+    assert_text 'fiddlesticks'
  
-      assert_text 'fiddlesticks'
+    # reset api server base config to let the popup rendering to work
+    Rails.configuration.arvados_v1_base = original_arvados_v1_base
  
-      # reset api server base config to let the popup rendering to work
-      Rails.configuration.arvados_v1_base = original_arvados_v1_base
+    click_link 'Report problem'
  
-      click_link 'Report problem'
+    within '.modal-content' do
+      assert_text 'Report a problem'
+      assert_no_text 'Version / debugging info'
+      assert_text 'Describe the problem'
+      assert_text 'Send problem report'
+      # "Send" button should be disabled until text is entered
+      assert_no_selector 'a,button:not([disabled])', text: 'Send problem report'
+      assert_selector 'a,button', text: 'Cancel'
  
-      within '.modal-content' do
-        assert_text 'Report a problem'
-        assert_no_text 'Version / debugging info'
-        assert_text 'Describe the problem'
-        assert_text 'Send problem report'
-        # "Send" button should be disabled until text is entered
-        assert_no_selector 'a,button:not([disabled])', text: 'Send problem report'
-        assert_selector 'a,button', text: 'Cancel'
+      report = mock
+      report.expects(:deliver).returns true
+      IssueReporter.expects(:send_report).returns report
  
-        report = mock
-        report.expects(:deliver).returns true
-        IssueReporter.expects(:send_report).returns report
+      # enter a report text and click on report
+      find_field('report_issue_text').set 'my test report text'
+      click_button 'Send problem report'
  
-        # enter a report text and click on report
-        find_field('report_issue_text').set 'my test report text'
-        click_button 'Send problem report'
-
-        # ajax success updated button texts and added footer message
-        assert_no_selector 'a,button', text: 'Send problem report'
-        assert_no_selector 'a,button', text: 'Cancel'
-        assert_text 'Report sent'
-        assert_text 'Thanks for reporting this issue'
-        click_button 'Close'
-      end
-
-      # out of the popup now and should be back in the error page
-      assert_text 'fiddlesticks'
-    ensure
-      Rails.configuration.arvados_v1_base = original_arvados_v1_base
+      # ajax success updated button texts and added footer message
+      assert_no_selector 'a,button', text: 'Send problem report'
+      assert_no_selector 'a,button', text: 'Cancel'
+      assert_text 'Report sent'
+      assert_text 'Thanks for reporting this issue'
+      click_button 'Close'
      end
+
+    # out of the popup now and should be back in the error page
+    assert_text 'fiddlesticks'
    end
  end
diff --git a/apps/workbench/test/integration/integration_test_utils.rb b/apps/workbench/test/integration/integration_test_utils.rb

new file mode 100644 (file)

index 0000000..3fe5800
--- /dev/null
+++ b/apps/workbench/test/integration/integration_test_utils.rb
@@ -0,0 +1,12 @@
+# This file is used to define methods reusable by two or more integration tests
+#
+
+# check_checkboxes_state asserts that the page holds at least one
+# checkbox matching 'selector', and that all matching checkboxes
+# are in state 'checkbox_status' (i.e. checked if true, unchecked otherwise)
+def assert_checkboxes_state(selector, checkbox_status, msg=nil)
+  assert page.has_selector?(selector)
+  page.all(selector).each do |checkbox|
+    assert(checkbox.checked? == checkbox_status, msg)
+  end
+end
diff --git a/apps/workbench/test/integration/jobs_test.rb b/apps/workbench/test/integration/jobs_test.rb

index 29bccd9d76b20846cd0d6ce7519b2858a81bcd5a..2cae500027aeebd8f79270a4ba26f61e4496373f 100644 (file)
--- a/apps/workbench/test/integration/jobs_test.rb
+++ b/apps/workbench/test/integration/jobs_test.rb
@@ -95,10 +95,10 @@ class JobsTest < ActionDispatch::IntegrationTest
  
        # Re-running jobs doesn't currently work because the test API
        # server has no git repository to check against.  For now, check
-      # that the correct script version is mentioned in the
-      # Fiddlesticks error message.
+      # that the error message says something appropriate for that
+      # situation.
        if expect_options && use_latest
-        assert_text "Script version #{job['supplied_script_version']} does not resolve to a commit"
+        assert_text "077ba2ad3ea24a929091a9e6ce545c93199b8e57"
        else
          assert_text "Script version #{job['script_version']} does not resolve to a commit"
        end
diff --git a/apps/workbench/test/integration/pipeline_instances_test.rb b/apps/workbench/test/integration/pipeline_instances_test.rb

index f2916741cb28fbc0cdee25ed85093a47a0b45f9e..b6bf700d0948a1eb85a8ed00fe0c30fc6ae6e037 100644 (file)
--- a/apps/workbench/test/integration/pipeline_instances_test.rb
+++ b/apps/workbench/test/integration/pipeline_instances_test.rb
@@ -469,7 +469,7 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
        page_text = page.text
  
        if run_time
-        match = /This pipeline started at (.*)\. It failed after (.*) seconds at (.*)\. Check the Log/.match page_text
+        match = /This pipeline started at (.*)\. It failed after (.*) at (.*)\. Check the Log/.match page_text
        else
          match = /This pipeline started at (.*). It has been active for(.*)/.match page_text
        end
@@ -541,4 +541,45 @@ class PipelineInstancesTest < ActionDispatch::IntegrationTest
      visit page_with_token 'active', '/pipeline_instances/' + pi['uuid']
      assert_text 'Queued for '
    end
+
+  test "job logs linked for running pipeline" do
+    pi = api_fixture("pipeline_instances", "running_pipeline_with_complete_job")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      log_link = find("a", text: "Log for previous")
+      assert_includes(log_link[:href],
+                      pi["components"]["previous"]["job"]["log"])
+      assert_selector "#event_log_div"
+    end
+  end
+
+  test "job logs linked for complete pipeline" do
+    pi = api_fixture("pipeline_instances", "complete_pipeline_with_two_jobs")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      pi["components"].each do |cname, cspec|
+        log_link = find("a", text: "Log for #{cname}")
+        assert_includes(log_link[:href], cspec["job"]["log"])
+      end
+      assert_no_selector "#event_log_div"
+    end
+  end
+
+  test "job logs linked for failed pipeline" do
+    pi = api_fixture("pipeline_instances", "failed_pipeline_with_two_jobs")
+    visit(page_with_token("active", "/pipeline_instances/#{pi['uuid']}"))
+    click_on "Log"
+    within "#Log" do
+      assert_text "Log for previous"
+      pi["components"].each do |cname, cspec|
+        log_link = find("a", text: "Log for #{cname}")
+        assert_includes(log_link[:href], cspec["job"]["log"])
+      end
+      assert_no_selector "#event_log_div"
+    end
+  end
  end
diff --git a/apps/workbench/test/integration/projects_test.rb b/apps/workbench/test/integration/projects_test.rb

index 6c9bd6698efb118851da8a11056037652950f0b5..64a547108bc3e50157a0bebed23392728c83ee45 100644 (file)
--- a/apps/workbench/test/integration/projects_test.rb
+++ b/apps/workbench/test/integration/projects_test.rb
@@ -1,5 +1,6 @@
  require 'integration_helper'
  require 'helpers/share_object_helper'
+require_relative 'integration_test_utils'
  
  class ProjectsTest < ActionDispatch::IntegrationTest
    include ShareObjectHelper
@@ -35,109 +36,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
             "Description update did not survive page refresh")
    end
  
-  test 'Find a project and edit description to textile description' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('<p>*Textile description for A project* - "take me home":/ </p><p>And a new paragraph in description.</p>')
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-
-    # visit project page
-    visit current_path
-    assert_no_text '*Textile description for A project*'
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(find?('.container-fluid', text: 'And a new paragraph in description'),
-           "Description did not contain the expected new paragraph")
-    assert(page.has_link?("take me home"), "link not found in description")
-
-    click_link 'take me home'
-
-    # now in dashboard
-    assert(page.has_text?('Active pipelines'), 'Active pipelines - not found on dashboard')
-  end
-
-  test 'Find a project and edit description to html description' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('<br>Textile description for A project</br> - <a href="/">take me home</a>')
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-    visit current_path
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(!find?('.container-fluid', text: '<br>Textile description for A project</br>'),
-           "Textile description is displayed with uninterpreted formatting characters")
-    assert(page.has_link?("take me home"),"link not found in description")
-    click_link 'take me home'
-    assert page.has_text?('Active pipelines')
-  end
-
-  test 'Find a project and edit description to textile description with link to object' do
-    visit page_with_token 'active', '/'
-    find("#projects-menu").click
-    find(".dropdown-menu a", text: "A Project").click
-    within('.container-fluid', text: api_fixture('groups')['aproject']['name']) do
-      find('span', text: api_fixture('groups')['aproject']['name']).click
-      within('.arv-description-as-subtitle') do
-        find('.fa-pencil').click
-        find('.editable-input textarea').set('*Textile description for A project* - "go to sub-project":' + api_fixture('groups')['asubproject']['uuid'] + "'")
-        find('.editable-submit').click
-      end
-      wait_for_ajax
-    end
-    visit current_path
-    assert(find?('.container-fluid', text: 'Textile description for A project'),
-           "Description update did not survive page refresh")
-    assert(!find?('.container-fluid', text: '*Textile description for A project*'),
-           "Textile description is displayed with uninterpreted formatting characters")
-    assert(page.has_link?("go to sub-project"), "link not found in description")
-    click_link 'go to sub-project'
-    assert(page.has_text?(api_fixture('groups')['asubproject']['name']), 'sub-project name not found after clicking link')
-  end
-
-  test 'Add a new name, then edit it, without creating a duplicate' do
-    project_uuid = api_fixture('groups')['aproject']['uuid']
-    specimen_uuid = api_fixture('traits')['owned_by_aproject_with_no_name']['uuid']
-    visit page_with_token 'active', '/projects/' + project_uuid
-    click_link 'Other objects'
-    within '.selection-action-container' do
-      # Wait for the tab to load:
-      assert_selector 'tr[data-kind="arvados#trait"]'
-      within first('tr', text: 'Trait') do
-        find(".fa-pencil").click
-        find('.editable-input input').set('Now I have a name.')
-        find('.glyphicon-ok').click
-        assert_selector '.editable', text: 'Now I have a name.'
-        find(".fa-pencil").click
-        find('.editable-input input').set('Now I have a new name.')
-        find('.glyphicon-ok').click
-      end
-      wait_for_ajax
-      assert_selector '.editable', text: 'Now I have a new name.'
-    end
-    visit current_path
-    click_link 'Other objects'
-    within '.selection-action-container' do
-      find '.editable', text: 'Now I have a new name.'
-      assert_no_selector '.editable', text: 'Now I have a name.'
-    end
-  end
-
    test 'Create a project and move it into a different project' do
      visit page_with_token 'active', '/projects'
      find("#projects-menu").click
@@ -201,12 +99,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
                      text: group_name("anonymous_group"))
    end
  
-  test "project viewer can't see project sharing tab" do
-    show_object_using('project_viewer', 'groups', 'aproject', 'A Project')
-    assert(page.has_no_link?("Sharing"),
-           "read-only project user sees sharing tab")
-  end
-
    test "project owner can manage sharing for another user" do
      add_user = api_fixture('users')['future_project_user']
      new_name = ["first_name", "last_name"].map { |k| add_user[k] }.join(" ")
@@ -490,9 +382,7 @@ class ProjectsTest < ActionDispatch::IntegrationTest
        my_project = api_fixture('groups')['aproject']
        my_collection = api_fixture('collections')['collection_to_move_around_in_aproject']
  
-      visit page_with_token user, '/'
-      find("#projects-menu").click
-      find(".dropdown-menu a", text: my_project['name']).click
+      visit page_with_token user, "/projects/#{my_project['uuid']}"
        click_link 'Data collections'
        assert page.has_text?(my_collection['name']), 'Collection not found in project'
  
@@ -516,18 +406,6 @@ class ProjectsTest < ActionDispatch::IntegrationTest
      end
    end
  
-  [
-    ["jobs", "/jobs"],
-    ["pipelines", "/pipeline_instances"],
-    ["collections", "/collections"]
-  ].each do |target,path|
-    test "Test dashboard button all #{target}" do
-      visit page_with_token 'active', '/'
-      click_link "All #{target}"
-      assert_equal path, current_path
-    end
-  end
-
    def scroll_setup(project_name,
                     total_nbr_items,
                     item_list_parameter,
@@ -650,33 +528,13 @@ class ProjectsTest < ActionDispatch::IntegrationTest
      end
    end
  
-  # Move button accessibility
-  [
-    ['admin', true],
-    ['active', true],  # project owner
-    ['project_viewer', false],
-    ].each do |user, can_move|
-    test "#{user} can move subproject under another user's Home #{can_move}" do
-      project = api_fixture('groups')['aproject']
-      collection = api_fixture('collections')['collection_to_move_around_in_aproject']
-
-      # verify the project move button
-      visit page_with_token user, "/projects/#{project['uuid']}"
-      if can_move
-        assert page.has_link? 'Move project...'
-      else
-        assert page.has_no_link? 'Move project...'
-      end
-    end
-  end
-
    test "error while loading tab" do
      original_arvados_v1_base = Rails.configuration.arvados_v1_base
  
      visit page_with_token 'active', '/projects/' + api_fixture('groups')['aproject']['uuid']
  
      # Point to a bad api server url to generate error
-    Rails.configuration.arvados_v1_base = "https://[100::f]:1/"
+    Rails.configuration.arvados_v1_base = "https://[::1]:1/"
      click_link 'Other objects'
      within '#Other_objects' do
        # Error
@@ -722,4 +580,133 @@ class ProjectsTest < ActionDispatch::IntegrationTest
      find("#page-wrapper .nav-tabs :first-child a").click
      assert_text("Collection modified at")
    end
+
+  # "Select all" and "Unselect all" options
+  test "select all and unselect all actions" do
+    need_selenium 'to check and uncheck checkboxes'
+
+    visit page_with_token 'active', '/projects/' + api_fixture('groups')['aproject']['uuid']
+
+    # Go to "Data collections" tab and click on "Select all"
+    click_link 'Data collections'
+    wait_for_ajax
+
+    # Initially, all selection options for this tab should be disabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Select all
+    click_button 'Select all'
+
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Now the selection options should be enabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Go to Jobs and pipelines tab and assert none selected
+    click_link 'Jobs and pipelines'
+    wait_for_ajax
+
+    # Since this is the first visit to this tab, all selection options should be disabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    assert_checkboxes_state('input[type=checkbox]', false, '"select all" should check all checkboxes')
+
+    # Select all
+    click_button 'Select all'
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Applicable selection options should be enabled
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li', text: 'Copy selected'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Unselect all
+    click_button 'Unselect all'
+    assert_checkboxes_state('input[type=checkbox]', false, '"select all" should check all checkboxes')
+
+    # All selection options should be disabled again
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Create new collection with selected collections'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Go back to Data collections tab and verify all are still selected
+    click_link 'Data collections'
+    wait_for_ajax
+
+    # Selection options should be enabled based on the fact that all collections are still selected in this tab
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    assert_checkboxes_state('input[type=checkbox]', true, '"select all" should check all checkboxes')
+
+    # Unselect all
+    find('button#unselect-all').click
+    assert_checkboxes_state('input[type=checkbox]', false, '"unselect all" should clear all checkboxes')
+
+    # Now all selection options should be disabled because none of the collections are checked
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li.disabled', text: 'Copy selected'
+    end
+
+    # Verify checking just one checkbox still works as expected
+    within('tr', text: api_fixture('collections')['collection_to_move_around_in_aproject']['name']) do
+      find('input[type=checkbox]').click
+    end
+
+    click_button 'Selection'
+    within('.selection-action-container') do
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+      assert_selector 'li', text: 'Create new collection with selected collections'
+      assert_no_selector 'li.disabled', text: 'Copy selected'
+    end
+  end
+
+  test "test search all projects menu item in projects menu" do
+     need_selenium
+     visit page_with_token('active')
+     find('#projects-menu').click
+     within('.dropdown-menu') do
+       assert_selector 'a', text: 'Search all projects'
+       find('a', text: 'Search all projects').click
+     end
+     within('.modal-content') do
+        assert page.has_text?('All projects'), 'No text - All projects'
+        assert page.has_text?('Search'), 'No text - Search'
+        assert page.has_text?('Cancel'), 'No text - Cancel'
+        fill_in "Search", with: 'Unrestricted public data'
+        wait_for_ajax
+        assert_selector 'div', text: 'Unrestricted public data'
+        find(:xpath, '//*[@id="choose-scroll"]/div[2]/div').click
+        click_button 'Show'
+     end
+     assert page.has_text?('Unrestricted public data'), 'No text - Unrestricted public data'
+     assert page.has_text?('An anonymously accessible project'), 'No text - An anonymously accessible project'
+  end
  end
diff --git a/apps/workbench/test/integration/report_issue_test.rb b/apps/workbench/test/integration/report_issue_test.rb

index 4a15851636fba6f0502f741d181af42727a921af..4e1920a939de88d67ffb7cabee8f40ae3c482caa 100644 (file)
--- a/apps/workbench/test/integration/report_issue_test.rb
+++ b/apps/workbench/test/integration/report_issue_test.rb
@@ -84,7 +84,8 @@ class ReportIssueTest < ActionDispatch::IntegrationTest
      ['active', api_fixture('users')['active']],
      ['admin', api_fixture('users')['admin']],
      ['active_no_prefs', api_fixture('users')['active_no_prefs']],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile']],
+    ['active_no_prefs_profile_no_getting_started_shown',
+        api_fixture('users')['active_no_prefs_profile_no_getting_started_shown']],
    ].each do |token, user|
  
      test "check version info and report issue for user #{token}" do
diff --git a/apps/workbench/test/integration/repositories_browse_test.rb b/apps/workbench/test/integration/repositories_browse_test.rb

new file mode 100644 (file)

index 0000000..a6a85b5
--- /dev/null
+++ b/apps/workbench/test/integration/repositories_browse_test.rb
@@ -0,0 +1,53 @@
+require 'integration_helper'
+require 'helpers/repository_stub_helper'
+require 'helpers/share_object_helper'
+
+class RepositoriesTest < ActionDispatch::IntegrationTest
+  include RepositoryStubHelper
+  include ShareObjectHelper
+
+  reset_api_fixtures :after_each_test, false
+
+  setup do
+    need_javascript
+  end
+
+  test "browse repository from jobs#show" do
+    sha1 = api_fixture('jobs')['running']['script_version']
+    _, fakecommit, fakefile =
+      stub_repo_content sha1: sha1, filename: 'crunch_scripts/hash'
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on api_fixture('jobs')['running']['script']
+    assert_text fakefile
+    click_on 'crunch_scripts'
+    assert_selector 'td a', text: 'hash'
+    click_on 'foo'
+    assert_selector 'td a', text: 'crunch_scripts'
+    click_on sha1
+    assert_text fakecommit
+
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on 'active/foo'
+    assert_selector 'td a', text: 'crunch_scripts'
+
+    show_object_using 'active', 'jobs', 'running', sha1
+    click_on sha1
+    assert_text fakecommit
+  end
+
+  test "browse using arv-git-http" do
+    repo = api_fixture('repositories')['foo']
+    portfile =
+      File.expand_path('../../../../../tmp/arv-git-httpd-ssl.port', __FILE__)
+    gitsslport = File.read(portfile)
+    Repository.any_instance.
+      stubs(:http_fetch_url).
+      returns "https://localhost:#{gitsslport}/#{repo['name']}.git"
+    commit_sha1 = '1de84a854e2b440dc53bf42f8548afa4c17da332'
+    visit page_with_token('active', "/repositories/#{repo['uuid']}/commit/#{commit_sha1}")
+    assert_text "Date:   Tue Mar 18 15:55:28 2014 -0400"
+    visit page_with_token('active', "/repositories/#{repo['uuid']}/tree/#{commit_sha1}")
+    assert_selector "tbody td a", "foo"
+    assert_text "12 bytes"
+  end
+end
diff --git a/apps/workbench/test/integration/search_box_test.rb b/apps/workbench/test/integration/search_box_test.rb

index 05c7f25185f92a4bba3aafc9d820f49bd0f9d26b..14d28d2adc7ac7b79436ede9b32bb32ac4c92371 100644 (file)
--- a/apps/workbench/test/integration/search_box_test.rb
+++ b/apps/workbench/test/integration/search_box_test.rb
@@ -8,17 +8,20 @@ class SearchBoxTest < ActionDispatch::IntegrationTest
    # test the search box
    def verify_search_box user
      if user && user['is_active']
-      # let's search for a valid uuid
+      aproject_uuid = api_fixture('groups')['aproject']['uuid']
+      # let's search for aproject by uuid
        within('.navbar-fixed-top') do
          page.has_field?('search')
-        page.find_field('search').set user['uuid']
+        page.find_field('search').set aproject_uuid
          page.find('.glyphicon-search').click
        end
  
-      # we should now be in the user's home project as a result of search
-      assert_selector "#Data_collections[data-object-uuid='#{user['uuid']}']", "Expected to be in user page after search click"
+      # we should now be in aproject as a result of search
+      assert_selector 'a', text:'Data collections'
+      click_link 'Data collections'
+      assert_selector "#Data_collections[data-object-uuid='#{aproject_uuid}']", "Expected to be in user page after search click"
  
-      # let's search again for an invalid valid uuid
+      # let's search again for an invalid uuid
        within('.navbar-fixed-top') do
          search_for = String.new user['uuid']
          search_for[0]='1'
diff --git a/apps/workbench/test/integration/user_profile_test.rb b/apps/workbench/test/integration/user_profile_test.rb

index accc6a7963ef5a9a8dee8ad188f0c8552d9d070d..241a3cc1d8f4659d85b35a4c60b1b14ceab1efb5 100644 (file)
--- a/apps/workbench/test/integration/user_profile_test.rb
+++ b/apps/workbench/test/integration/user_profile_test.rb
@@ -24,26 +24,40 @@ class UserProfileTest < ActionDispatch::IntegrationTest
          assert page.has_no_text?('Save profile'), 'Found text - Save profile'
        end
      elsif invited
-      assert page.has_text?('Please check the box below to indicate that you have read and accepted the user agreement'), 'Not found text - Please check the box below . . .'
+      assert page.has_text?('Please check the box below to indicate that you have read and accepted the user agreement'),
+        'Not found text - Please check the box below . . .'
        assert page.has_no_text?('Save profile'), 'Found text - Save profile'
      else
        assert page.has_text?('Your account is inactive'), 'Not found text - Your account is inactive'
        assert page.has_no_text?('Save profile'), 'Found text - Save profile'
      end
  
+    # If the user has not already seen getting_started modal, it will be shown on first visit.
+    if user and user['is_active'] and !user['prefs']['getting_started_shown']
+      within '.modal-content' do
+        assert_text 'Getting Started'
+        assert_selector 'button', text: 'Next'
+        assert_selector 'button', text: 'Prev'
+        first('button', text: 'x').click
+      end
+    end
+
      within('.navbar-fixed-top') do
        if !user
          assert page.has_link?('Log in'), 'Not found link - Log in'
        else
          # my account menu
-        assert page.has_link?("#{user['email']}"), 'Not found link - email'
-        find('a', text: "#{user['email']}").click
+        assert(page.has_link?("notifications-menu"), 'no user menu')
+        page.find("#notifications-menu").click
          within('.dropdown-menu') do
            if user['is_active']
              assert page.has_no_link?('Not active'), 'Found link - Not active'
              assert page.has_no_link?('Sign agreements'), 'Found link - Sign agreements'
  
-            assert page.has_link?('Manage account'), 'No link - Manage account'
+            assert page.has_link?('Virtual machines'), 'No link - Virtual machines'
+            assert page.has_link?('Repositories'), 'No link - Repositories'
+            assert page.has_link?('Current token'), 'No link - Current token'
+            assert page.has_link?('SSH keys'), 'No link - SSH Keys'
  
              if profile_config
                assert page.has_link?('Manage profile'), 'No link - Manage profile'
@@ -98,7 +112,11 @@ class UserProfileTest < ActionDispatch::IntegrationTest
      click_button "Save profile"
      # profile saved and in profile page now with success
      assert page.has_text?('Thank you for filling in your profile'), 'No text - Thank you for filling'
-    click_link 'Back to work!'
+    if user['prefs']['getting_started_shown']
+      click_link 'Back to work!'
+    else
+      click_link 'Get started'
+    end
  
      # profile saved and in home page now
      assert page.has_text?('Active pipelines'), 'No text - Active pipelines'
@@ -111,7 +129,10 @@ class UserProfileTest < ActionDispatch::IntegrationTest
      ['active', api_fixture('users')['active'], true, true],
      ['admin', api_fixture('users')['admin'], true, true],
      ['active_no_prefs', api_fixture('users')['active_no_prefs'], true, false],
-    ['active_no_prefs_profile', api_fixture('users')['active_no_prefs_profile'], true, false],
+    ['active_no_prefs_profile_no_getting_started_shown',
+      api_fixture('users')['active_no_prefs_profile_no_getting_started_shown'], true, false],
+    ['active_no_prefs_profile_with_getting_started_shown',
+      api_fixture('users')['active_no_prefs_profile_with_getting_started_shown'], true, false],
    ].each do |token, user, invited, has_profile|
  
      test "visit home page when profile is configured for user #{token}" do
diff --git a/apps/workbench/test/integration/user_manage_account_test.rb b/apps/workbench/test/integration/user_settings_menu_test.rb

similarity index 59%

rename from apps/workbench/test/integration/user_manage_account_test.rb

rename to apps/workbench/test/integration/user_settings_menu_test.rb

index 9b5e5d61e17ec0495cc1dc3dffc19e5395db8604..147b48702b7856def5ed1c7ed565481d6c77ad41 100644 (file)
--- a/apps/workbench/test/integration/user_manage_account_test.rb
+++ b/apps/workbench/test/integration/user_settings_menu_test.rb
@@ -1,30 +1,30 @@
  require 'integration_helper'
  
-class UserManageAccountTest < ActionDispatch::IntegrationTest
+class UserSettingsMenuTest < ActionDispatch::IntegrationTest
    setup do
      need_javascript
    end
  
-  # test manage_account page
-  def verify_manage_account user
+  # test user settings menu
+  def verify_user_settings_menu user
      if user['is_active']
        within('.navbar-fixed-top') do
-        find('a', text: "#{user['email']}").click
+        page.find("#notifications-menu").click
          within('.dropdown-menu') do
-          find('a', text: 'Manage account').click
+          assert_selector 'a', text: 'Virtual machines'
+          assert_selector 'a', text: 'Repositories'
+          assert_selector 'a', text: 'Current token'
+          assert_selector 'a', text: 'SSH keys'
+          find('a', text: 'SSH keys').click
          end
        end
  
-      # now in manage account page
-      assert page.has_text?('Virtual Machines'), 'No text - Virtual Machines'
-      assert page.has_text?('Repositories'), 'No text - Repositories'
-      assert page.has_text?('SSH Keys'), 'No text - SSH Keys'
-      assert page.has_text?('Current Token'), 'No text - Current Token'
-      assert page.has_text?('The Arvados API token is a secret key that enables the Arvados SDKs to access Arvados'), 'No text - Arvados API token'
+      # now in SSH Keys page
+      assert page.has_text?('Add new SSH key'), 'No text - Add SSH key'
        add_and_verify_ssh_key
      else  # inactive user
        within('.navbar-fixed-top') do
-        find('a', text: "#{user['email']}").click
+        page.find("#notifications-menu").click
          within('.dropdown-menu') do
            assert page.has_no_link?('Manage profile'), 'Found link - Manage profile'
          end
@@ -42,13 +42,13 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
  
          page.find_field('public_key').set 'first test with an incorrect ssh key value'
          click_button 'Submit'
-        assert page.has_text?('Public key does not appear to be a valid ssh-rsa or dsa public key'), 'No text - Public key does not appear to be a valid'
+        assert_text 'Public key does not appear to be a valid ssh-rsa or dsa public key'
  
          public_key_str = api_fixture('authorized_keys')['active']['public_key']
          page.find_field('public_key').set public_key_str
          page.find_field('name').set 'added_in_test'
          click_button 'Submit'
-        assert page.has_text?('Public key already exists in the database, use a different key.'), 'No text - Public key already exists'
+        assert_text 'Public key already exists in the database, use a different key.'
  
          new_key = SSHKey.generate
          page.find_field('public_key').set new_key.ssh_public_key
@@ -57,7 +57,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
        end
  
        # key must be added. look for it in the refreshed page
-      assert page.has_text?('added_in_test'), 'No text - added_in_test'
+      assert_text 'added_in_test'
    end
  
    [
@@ -66,9 +66,9 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
      ['active', api_fixture('users')['active']],
      ['admin', api_fixture('users')['admin']],
    ].each do |token, user|
-    test "test manage account for user #{token}" do
+    test "test user settings menu for user #{token}" do
        visit page_with_token(token)
-      verify_manage_account user
+      verify_user_settings_menu user
      end
    end
  
@@ -84,7 +84,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
      ['job_reader', :ssh, :pipeline],
      ['active'],
    ].each do |user, *expect|
-    test "manage account for #{user} with notifications #{expect.inspect}" do
+    test "user settings menu for #{user} with notifications #{expect.inspect}" do
        Rails.configuration.anonymous_user_token = false
        visit page_with_token(user)
        click_link 'notifications-menu'
@@ -110,8 +110,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
    end
  
    test "verify repositories for active user" do
-    visit page_with_token('active', '/manage_account')
-
+    visit page_with_token('active',"/users/#{api_fixture('users')['active']['uuid']}/repositories")
      repos = [[api_fixture('repositories')['foo'], true, true],
               [api_fixture('repositories')['repository3'], false, false],
               [api_fixture('repositories')['repository4'], true, false]]
@@ -136,7 +135,7 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
  
    test "request shell access" do
      ActionMailer::Base.deliveries = []
-    visit page_with_token('spectator', '/manage_account')
+    visit page_with_token('spectator', "/users/#{api_fixture('users')['spectator']['uuid']}/virtual_machines")
      assert_text 'You do not have access to any virtual machines'
      click_link 'Send request for shell access'
  
@@ -162,13 +161,72 @@ class UserManageAccountTest < ActionDispatch::IntegrationTest
      # Revisit the page and verify the request sent message along with
      # the request button.
      within('.navbar-fixed-top') do
-      find('a', text: 'spectator').click
+      page.find("#notifications-menu").click
        within('.dropdown-menu') do
-        find('a', text: 'Manage account').click
+        find('a', text: 'Virtual machines').click
        end
      end
      assert_text 'You do not have access to any virtual machines.'
      assert_text 'A request for shell access was sent on '
      assert_selector 'a', text: 'Send request for shell access'
    end
+
+  test "create new repository" do
+    visit page_with_token("active_trustedclient")
+    within('.navbar-fixed-top') do
+      page.find("#notifications-menu").click
+      within('.dropdown-menu') do
+        assert_selector 'a', text: 'Repositories'
+        find('a', text: 'Repositories').click
+      end
+    end
+    click_on "Add new repository"
+    within ".modal-dialog" do
+      fill_in "Name", with: "workbenchtest"
+      click_on "Create"
+    end
+    assert_text ":active/workbenchtest.git"
+    assert_match /git@git.*:active\/workbenchtest.git/, page.text
+    assert_match /https:\/\/git.*\/active\/workbenchtest.git/, page.text
+  end
+
+  [
+    ['virtual_machines', nil, 'Host name', 'testvm2.shell'],
+    ['repositories', 'Add new repository', 'It may take a minute or two before you can clone your new repository.', 'active/foo'],
+    ['/current_token', nil, 'HISTIGNORE=$HISTIGNORE', 'ARVADOS_API_TOKEN=3kg6k6lzmp9kj5'],
+    ['ssh_keys', 'Add new SSH key', 'Click here to learn about SSH keys in Arvados.', 'active'],
+  ].each do |page_name, button_name, look_for, content|
+    test "test user settings menu for page #{page_name}" do
+      if page_name == '/current_token'
+        visit page_with_token('active', page_name)
+      else
+        visit page_with_token('active', "/users/#{api_fixture('users')['active']['uuid']}/#{page_name}")
+      end
+
+      assert page.has_text? content
+      if button_name
+        assert_selector 'a', text: button_name
+        find('a', text: button_name).click
+      end
+
+      assert page.has_text? look_for
+    end
+  end
+
+  [
+    ['virtual_machines', 'You do not have access to any virtual machines.'],
+    ['repositories', 'You do not seem to have access to any repositories.'],
+    ['/current_token', 'HISTIGNORE=$HISTIGNORE'],
+    ['ssh_keys', 'You have not yet set up an SSH public key for use with Arvados.'],
+  ].each do |page_name, look_for|
+    test "test user settings menu for page #{page_name} when page is empty" do
+      if page_name == '/current_token'
+        visit page_with_token('user1_with_load', page_name)
+      else
+        visit page_with_token('admin', "/users/#{api_fixture('users')['user1_with_load']['uuid']}/#{page_name}")
+      end
+
+     assert page.has_text? look_for
+    end
+  end
  end
diff --git a/apps/workbench/test/integration/users_test.rb b/apps/workbench/test/integration/users_test.rb

index 80e6a71932efb5098aac750499c3654fc44c6cf8..1ae302c23947c2968d194fdb006bcdaf3561be04 100644 (file)
--- a/apps/workbench/test/integration/users_test.rb
+++ b/apps/workbench/test/integration/users_test.rb
@@ -56,7 +56,6 @@ class UsersTest < ActionDispatch::IntegrationTest
      within '.modal-content' do
        find 'label', text: 'Virtual Machine'
        fill_in "email", :with => "foo@example.com"
-      fill_in "repo_name", :with => "test_repo"
        click_button "Submit"
        wait_for_ajax
      end
@@ -81,7 +80,6 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      click_link 'Advanced'
      click_link 'Metadata'
-    assert page.has_text? 'Repository: test_repo'
      assert !(page.has_text? 'VirtualMachine:')
    end
  
@@ -102,11 +100,10 @@ class UsersTest < ActionDispatch::IntegrationTest
      click_link 'Admin'
      assert page.has_text? 'As an admin, you can setup'
  
-    click_link 'Setup Active User'
+    click_link 'Setup shell account for Active User'
  
      within '.modal-content' do
        find 'label', text: 'Virtual Machine'
-      fill_in "repo_name", :with => "test_repo"
        click_button "Submit"
      end
  
@@ -115,16 +112,17 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      click_link 'Advanced'
      click_link 'Metadata'
-    assert page.has_text? 'Repository: test_repo'
-    assert !(page.has_text? 'VirtualMachine:')
+    vm_links = all("a", text: "VirtualMachine:")
+    assert_equal(1, vm_links.size)
+    assert_equal("VirtualMachine: testvm2.shell", vm_links.first.text)
  
      # Click on Setup button again and this time also choose a VM
      click_link 'Admin'
-    click_link 'Setup Active User'
+    click_link 'Setup shell account for Active User'
  
      within '.modal-content' do
-      fill_in "repo_name", :with => "second_test_repo"
        select("testvm.shell", :from => 'vm_uuid')
+      fill_in "groups", :with => "test group one, test-group-two"
        click_button "Submit"
      end
  
@@ -133,8 +131,8 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      click_link 'Advanced'
      click_link 'Metadata'
-    assert page.has_text? 'Repository: second_test_repo'
      assert page.has_text? 'VirtualMachine: testvm.shell'
+    assert page.has_text? '["test group one", "test-group-two"]'
    end
  
    test "unsetup active user" do
@@ -181,16 +179,13 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      click_link 'Advanced'
      click_link 'Metadata'
-    assert !(page.has_text? 'Repository: test_repo')
-    assert !(page.has_text? 'Repository: second_test_repo')
-    assert !(page.has_text? 'VirtualMachine: testvm.shell')
+    assert page.has_no_text? 'VirtualMachine: testvm.shell'
  
      # setup user again and verify links present
      click_link 'Admin'
-    click_link 'Setup Active User'
+    click_link 'Setup shell account for Active User'
  
      within '.modal-content' do
-      fill_in "repo_name", :with => "second_test_repo"
        select("testvm.shell", :from => 'vm_uuid')
        click_button "Submit"
      end
@@ -200,44 +195,6 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      click_link 'Advanced'
      click_link 'Metadata'
-    assert page.has_text? 'Repository: second_test_repo'
      assert page.has_text? 'VirtualMachine: testvm.shell'
    end
-
-  [
-    ['admin', false],
-    ['active', true],
-  ].each do |username, expect_show_button|
-    test "login as #{username} and access show button #{expect_show_button}" do
-      need_javascript
-
-      user = api_fixture('users', username)
-
-      visit page_with_token(username, '/users')
-
-      if expect_show_button
-        within('tr', text: user['uuid']) do
-          assert_text user['email']
-          assert_selector 'a', text: 'Show'
-          find('a', text: 'Show').click
-        end
-        assert_selector 'a', 'Data collections'
-      else
-        # no 'Show' button in the admin user's own row
-        within('tr', text: user['uuid']) do
-          assert_text user['email']
-          assert_no_selector 'a', text: 'Show'
-        end
-
-        # but the admin user can access 'Show' button for other users
-        active_user = api_fixture('users', 'active')
-        within('tr', text: active_user['uuid']) do
-          assert_text active_user['email']
-          assert_selector 'a', text: 'Show'
-          find('a', text: 'Show').click
-          assert_selector 'a', 'Attributes'
-        end
-      end
-    end
-  end
  end
diff --git a/apps/workbench/test/integration_performance/collection_unit_test.rb b/apps/workbench/test/integration_performance/collection_unit_test.rb

new file mode 100644 (file)

index 0000000..6cf14b5
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collection_unit_test.rb
@@ -0,0 +1,71 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  teardown do
+    Thread.current[:arvados_api_client] = nil
+  end
+
+  # You can try with compress=false here too, but at last check it
+  # didn't make a significant difference.
+  [true].each do |compress|
+    test "crud cycle for collection with big manifest (compress=#{compress})" do
+      Rails.configuration.api_response_compression = compress
+      Thread.current[:arvados_api_client] = nil
+      crudtest
+    end
+  end
+
+  def crudtest
+    use_token :active
+    bigmanifest = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 20,
+                    bytes_per_block: 0)
+    end
+    c = time_block "new (manifest size = #{bigmanifest.length>>20}MiB)" do
+      Collection.new manifest_text: bigmanifest
+    end
+    time_block 'create' do
+      c.save!
+    end
+    time_block 'read' do
+      Collection.find c.uuid
+    end
+    time_block 'read(cached)' do
+      Collection.find c.uuid
+    end
+    time_block 'list' do
+      list = Collection.select(['uuid', 'manifest_text']).filter [['uuid','=',c.uuid]]
+      assert_equal 1, list.count
+      assert_equal c.uuid, list.first.uuid
+      assert_not_nil list.first.manifest_text
+    end
+    time_block 'update(name-only)' do
+      manifest_text_length = c.manifest_text.length
+      c.update_attributes name: 'renamed during test case'
+      assert_equal c.manifest_text.length, manifest_text_length
+    end
+    time_block 'update' do
+      c.manifest_text += ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:empty.txt\n"
+      c.save!
+    end
+    time_block 'delete' do
+      c.destroy
+    end
+    time_block 'read(404)' do
+      assert_empty Collection.filter([['uuid','=',c.uuid]])
+    end
+  end
+end
diff --git a/apps/workbench/test/integration_performance/collections_controller_test.rb b/apps/workbench/test/integration_performance/collections_controller_test.rb

new file mode 100644 (file)

index 0000000..3b81c60
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collections_controller_test.rb
@@ -0,0 +1,71 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class Blob
+end
+
+class BigCollectionsControllerTest < ActionController::TestCase
+  include ManifestExamples
+
+  setup do
+    Blob.stubs(:sign_locator).returns 'd41d8cd98f00b204e9800998ecf8427e+0'
+  end
+
+  test "combine two big and two small collections" do
+    @controller = ActionsController.new
+    bigmanifest1 = time_block 'build example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 20,
+                    bytes_per_block: 0)
+    end
+    bigmanifest2 = bigmanifest1.gsub '.txt', '.txt2'
+    smallmanifest1 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small1.txt\n"
+    smallmanifest2 = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:small2.txt\n"
+    totalsize = bigmanifest1.length + bigmanifest2.length +
+      smallmanifest1.length + smallmanifest2.length
+    parts = time_block "create (total #{totalsize>>20}MiB)" do
+      use_token :active do
+        {
+          big1: Collection.create(manifest_text: bigmanifest1),
+          big2: Collection.create(manifest_text: bigmanifest2),
+          small1: Collection.create(manifest_text: smallmanifest1),
+          small2: Collection.create(manifest_text: smallmanifest2),
+        }
+      end
+    end
+    time_block 'combine' do
+      post :combine_selected_files_into_collection, {
+        selection: [parts[:big1].uuid,
+                    parts[:big2].uuid,
+                    parts[:small1].uuid + '/small1.txt',
+                    parts[:small2].uuid + '/small2.txt',
+                   ],
+        format: :html
+      }, session_for(:active)
+    end
+    assert_response :redirect
+  end
+
+  [:json, :html].each do |format|
+    test "show collection with big manifest (#{format})" do
+      bigmanifest = time_block 'build example' do
+        make_manifest(streams: 100,
+                      files_per_stream: 100,
+                      blocks_per_file: 20,
+                      bytes_per_block: 0)
+      end
+      @controller = CollectionsController.new
+      c = time_block "create (manifest size #{bigmanifest.length>>20}MiB)" do
+        use_token :active do
+          Collection.create(manifest_text: bigmanifest)
+        end
+      end
+      time_block 'show' do
+        get :show, {id: c.uuid, format: format}, session_for(:active)
+      end
+      assert_response :success
+    end
+  end
+end
diff --git a/apps/workbench/test/integration_performance/collections_perf_test.rb b/apps/workbench/test/integration_performance/collections_perf_test.rb

new file mode 100644 (file)

index 0000000..3adece1
--- /dev/null
+++ b/apps/workbench/test/integration_performance/collections_perf_test.rb
@@ -0,0 +1,116 @@
+require 'integration_helper'
+
+# The tests in the "integration_performance" dir are not included in regular
+#   build pipeline since it is not one of the "standard" test directories.
+#
+# To run tests in this directory use the following command:
+# ./run-tests.sh WORKSPACE=~/arvados --only apps/workbench apps/workbench_test="TEST=test/integration_performance/*.rb"
+#
+
+class CollectionsPerfTest < ActionDispatch::IntegrationTest
+  setup do
+    Capybara.current_driver = :rack_test
+  end
+
+  def create_large_collection size, file_name_prefix
+    manifest_text = ". d41d8cd98f00b204e9800998ecf8427e+0"
+
+    i = 0
+    until manifest_text.length > size do
+      manifest_text << " 0:0:#{file_name_prefix}#{i.to_s}"
+      i += 1
+    end
+    manifest_text << "\n"
+
+    Rails.logger.info "Creating collection at #{Time.now.to_f}"
+    collection = Collection.create! ({manifest_text: manifest_text})
+    Rails.logger.info "Done creating collection at #{Time.now.to_f}"
+
+    collection
+  end
+
+  [
+    1000000,
+    10000000,
+    20000000,
+  ].each do |size|
+    test "Create and show large collection with manifest text of #{size}" do
+      use_token :active
+      new_collection = create_large_collection size, 'collection_file_name_with_prefix_'
+
+      Rails.logger.info "Visiting collection at #{Time.now.to_f}"
+      visit page_with_token('active', "/collections/#{new_collection.uuid}")
+      Rails.logger.info "Done visiting collection at #{Time.now.to_f}"
+
+      assert_text new_collection.uuid
+      assert(page.has_link?('collection_file_name_with_prefix_0'), "Collection page did not include file link")
+    end
+  end
+
+  # This does not work with larger sizes because of need_javascript.
+  # Just use one test with 100,000 for now.
+  [
+    100000,
+  ].each do |size|
+    test "Create, show, and update description for large collection with manifest text of #{size}" do
+      need_javascript
+
+      use_token :active
+      new_collection = create_large_collection size, 'collection_file_name_with_prefix_'
+
+      Rails.logger.info "Visiting collection at #{Time.now.to_f}"
+      visit page_with_token('active', "/collections/#{new_collection.uuid}")
+      Rails.logger.info "Done visiting collection at #{Time.now.to_f}"
+
+      assert_text new_collection.uuid
+      assert(page.has_link?('collection_file_name_with_prefix_0'), "Collection page did not include file link")
+
+      # edit description
+      Rails.logger.info "Editing description at #{Time.now.to_f}"
+      within('.arv-description-as-subtitle') do
+        find('.fa-pencil').click
+        find('.editable-input textarea').set('description for this large collection')
+        find('.editable-submit').click
+      end
+      Rails.logger.info "Done editing description at #{Time.now.to_f}"
+
+      assert_text 'description for this large collection'
+    end
+  end
+
+  [
+    [1000000, 10000],
+    [10000000, 10000],
+    [20000000, 10000],
+  ].each do |size1, size2|
+    test "Create one large collection of #{size1} and one small collection of #{size2} and combine them" do
+      use_token :active
+      first_collection = create_large_collection size1, 'collection_file_name_with_prefix_1_'
+      second_collection = create_large_collection size2, 'collection_file_name_with_prefix_2_'
+
+      Rails.logger.info "Visiting collections page at #{Time.now.to_f}"
+      visit page_with_token('active', "/collections")
+      Rails.logger.info "Done visiting collections page at at #{Time.now.to_f}"
+
+      assert_text first_collection.uuid
+      assert_text second_collection.uuid
+
+      within('tr', text: first_collection['uuid']) do
+        find('input[type=checkbox]').click
+      end
+
+      within('tr', text: second_collection['uuid']) do
+        find('input[type=checkbox]').click
+      end
+
+      Rails.logger.info "Clicking on combine collections option at #{Time.now.to_f}"
+      click_button 'Selection...'
+      within('.selection-action-container') do
+        click_link 'Create new collection with selected collections'
+      end
+      Rails.logger.info "Done combining collections at #{Time.now.to_f}"
+
+      assert(page.has_link?('collection_file_name_with_prefix_1_0'), "Collection page did not include file link")
+    end
+  end
+end
diff --git a/apps/workbench/test/performance/browsing_test.rb b/apps/workbench/test/performance/browsing_test.rb

index 843aa3aed4727e79e30e420a602bb80a46306587..d068ee2aaf08f3e7451ae817599e7dcaaca0d535 100644 (file)
--- a/apps/workbench/test/performance/browsing_test.rb
+++ b/apps/workbench/test/performance/browsing_test.rb
@@ -34,6 +34,8 @@ class BrowsingTest < WorkbenchPerformanceTest
        end
      end
  
+    sleep(50)
+
      # In the search dialog now. Expect at least one item in the result display.
      within '.modal-content' do
        assert_text 'All projects'
diff --git a/apps/workbench/test/test_helper.rb b/apps/workbench/test/test_helper.rb

index fdde55d41db63aced4559dc8d03409a58cc7246c..89d15c67d8de3708c4d74540518b9707e09aa432 100644 (file)
--- a/apps/workbench/test/test_helper.rb
+++ b/apps/workbench/test/test_helper.rb
@@ -44,7 +44,7 @@ class ActiveSupport::TestCase
      end
    end
  
-  setup do
+  teardown do
      Thread.current[:arvados_api_token] = nil
      Thread.current[:user] = nil
      Thread.current[:reader_tokens] = nil
@@ -95,22 +95,40 @@ module ApiFixtureLoader
  end
  
  module ApiMockHelpers
-  def stub_api_calls_with_body body, status_code=200
+  def fake_api_response body, status_code, headers
      resp = mock
-    stubbed_client = ArvadosApiClient.new
-    stubbed_client.instance_eval do
-      resp.responds_like_instance_of HTTP::Message
-      resp.stubs(:content).returns body
-      resp.stubs(:status_code).returns status_code
+    resp.responds_like_instance_of HTTP::Message
+    resp.stubs(:headers).returns headers
+    resp.stubs(:content).returns body
+    resp.stubs(:status_code).returns status_code
+    resp
+  end
+
+  def stub_api_calls_with_body body, status_code=200, headers={}
+    stub_api_calls
+    resp = fake_api_response body, status_code, headers
+    stub_api_client.stubs(:post).returns resp
+  end
+
+  def stub_api_calls
+    @stubbed_client = ArvadosApiClient.new
+    @stubbed_client.instance_eval do
        @api_client = HTTPClient.new
-      @api_client.stubs(:post).returns resp
      end
-    ArvadosApiClient.stubs(:new_or_current).returns(stubbed_client)
+    ArvadosApiClient.stubs(:new_or_current).returns(@stubbed_client)
    end
  
    def stub_api_calls_with_invalid_json
      stub_api_calls_with_body ']"omg,bogus"['
    end
+
+  # Return the HTTPClient mock used by the ArvadosApiClient mock. You
+  # must have called stub_api_calls first.
+  def stub_api_client
+    @stubbed_client.instance_eval do
+      @api_client
+    end
+  end
  end
  
  class ActiveSupport::TestCase
@@ -137,7 +155,7 @@ class ApiServerForTests
    @main_process_pid = $$
    @@server_is_running = false
  
-  def check_call *args
+  def check_output *args
      output = nil
      Bundler.with_clean_env do
        output = IO.popen *args do |io|
@@ -153,7 +171,12 @@ class ApiServerForTests
    def run_test_server
      env_script = nil
      Dir.chdir PYTHON_TESTS_DIR do
-      env_script = check_call %w(python ./run_test_server.py start --auth admin)
+      # These are no-ops if we're running within run-tests.sh (except
+      # that we do get a useful env_script back from "start", even
+      # though it doesn't need to start up a new server).
+      env_script = check_output %w(python ./run_test_server.py start --auth admin)
+      check_output %w(python ./run_test_server.py start_arv-git-httpd)
+      check_output %w(python ./run_test_server.py start_nginx)
      end
      test_env = {}
      env_script.each_line do |line|
@@ -169,8 +192,10 @@ class ApiServerForTests
  
    def stop_test_server
      Dir.chdir PYTHON_TESTS_DIR do
-      # This is a no-op if we're running within run-tests.sh
-      check_call %w(python ./run_test_server.py stop)
+      # These are no-ops if we're running within run-tests.sh
+      check_output %w(python ./run_test_server.py stop_nginx)
+      check_output %w(python ./run_test_server.py stop_arv-git-httpd)
+      check_output %w(python ./run_test_server.py stop)
      end
      @@server_is_running = false
    end
@@ -196,7 +221,7 @@ class ApiServerForTests
  
    def run_rake_task task_name, arg_string
      Dir.chdir ARV_API_SERVER_DIR do
-      check_call ['bundle', 'exec', 'rake', "#{task_name}[#{arg_string}]"]
+      check_output ['bundle', 'exec', 'rake', "#{task_name}[#{arg_string}]"]
      end
    end
  end
@@ -270,12 +295,17 @@ class ActiveSupport::TestCase
    end
  
    def after_teardown
-    if self.class.want_reset_api_fixtures[:after_each_test]
+    if self.class.want_reset_api_fixtures[:after_each_test] and
+        @want_reset_api_fixtures != false
        self.class.reset_api_fixtures_now
      end
      super
    end
  
+  def reset_api_fixtures_after_test t=true
+    @want_reset_api_fixtures = t
+  end
+
    protected
    def self.reset_api_fixtures_now
      # Never try to reset fixtures when we're just using test
diff --git a/apps/workbench/test/unit/arvados_base_test.rb b/apps/workbench/test/unit/arvados_base_test.rb

new file mode 100644 (file)

index 0000000..e9ac4f8
--- /dev/null
+++ b/apps/workbench/test/unit/arvados_base_test.rb
@@ -0,0 +1,87 @@
+require 'test_helper'
+
+class ArvadosBaseTest < ActiveSupport::TestCase
+  test '#save does not send unchanged string attributes' do
+    use_token :active do
+      fixture = api_fixture("collections")["foo_collection_in_aproject"]
+      c = Collection.find(fixture['uuid'])
+
+      new_name = 'name changed during test'
+
+      got_query = nil
+      stub_api_calls
+      stub_api_client.expects(:post).with do |url, query, opts={}|
+        got_query = query
+        true
+      end.returns fake_api_response('{}', 200, {})
+      c.name = new_name
+      c.save
+
+      updates = JSON.parse got_query['collection']
+      assert_equal updates['name'], new_name
+      refute_includes updates, 'description'
+      refute_includes updates, 'manifest_text'
+    end
+  end
+
+  test '#save does not send unchanged attributes missing because of select' do
+    use_token :active do
+      fixture = api_fixture("collections")["foo_collection_in_aproject"]
+      c = Collection.
+        filter([['uuid','=',fixture['uuid']]]).
+        select(['uuid']).
+        first
+      if 'MissingAttribute check is re-enabled' == true
+        assert_raises ActiveModel::MissingAttributeError do
+          c.properties
+        end
+      else
+        assert_equal({}, c.properties)
+      end
+
+      got_query = nil
+      stub_api_calls
+      stub_api_client.expects(:post).with do |url, query, opts={}|
+        got_query = query
+        true
+      end.returns fake_api_response('{}', 200, {})
+      c.name = 'foo'
+      c.save
+
+      updates = JSON.parse got_query['collection']
+      assert_includes updates, 'name'
+      refute_includes updates, 'description'
+      refute_includes updates, 'properties'
+    end
+  end
+
+  [false,
+   {},
+   {'foo' => 'bar'},
+  ].each do |init_props|
+    test "#save sends serialized attributes if changed from #{init_props}" do
+      use_token :active do
+        fixture = api_fixture("collections")["foo_collection_in_aproject"]
+        c = Collection.find(fixture['uuid'])
+
+        if init_props
+          c.properties = init_props if init_props
+          c.save!
+        end
+
+        got_query = nil
+        stub_api_calls
+        stub_api_client.expects(:post).with do |url, query, opts={}|
+          got_query = query
+          true
+        end.returns fake_api_response('{"etag":"fake","uuid":"fake"}', 200, {})
+
+        c.properties['baz'] = 'qux'
+        c.save!
+
+        updates = JSON.parse got_query['collection']
+        assert_includes updates, 'properties'
+      end
+    end
+  end
+end
diff --git a/apps/workbench/test/unit/pipeline_instance_test.rb b/apps/workbench/test/unit/pipeline_instance_test.rb

index 4cad6e64b604b06858267055a60f81a25d13c096..747cfc12e5f6b4b70d4e586cbb7bd17c824dd070 100644 (file)
--- a/apps/workbench/test/unit/pipeline_instance_test.rb
+++ b/apps/workbench/test/unit/pipeline_instance_test.rb
@@ -1,9 +1,13 @@
  require 'test_helper'
  
  class PipelineInstanceTest < ActiveSupport::TestCase
-  def attribute_editable_for?(token_name, pi_name, attr_name, ever=nil)
+  def find_pi_with(token_name, pi_name)
      use_token token_name
-    find_fixture(PipelineInstance, pi_name).attribute_editable?(attr_name, ever)
+    find_fixture(PipelineInstance, pi_name)
+  end
+
+  def attribute_editable_for?(token_name, pi_name, attr_name, ever=nil)
+    find_pi_with(token_name, pi_name).attribute_editable?(attr_name, ever)
    end
  
    test "admin can edit name" do
@@ -46,4 +50,62 @@ class PipelineInstanceTest < ActiveSupport::TestCase
                                     "components"),
             "components not editable on new pipeline")
    end
+
+  test "job_logs for partially complete pipeline" do
+    log_uuid = api_fixture("collections", "real_log_collection", "uuid")
+    pi = find_pi_with(:active, "running_pipeline_with_complete_job")
+    assert_equal({previous: log_uuid, running: nil}, pi.job_log_ids)
+  end
+
+  test "job_logs for complete pipeline" do
+    log_uuid = api_fixture("collections", "real_log_collection", "uuid")
+    pi = find_pi_with(:active, "complete_pipeline_with_two_jobs")
+    assert_equal({ancient: log_uuid, previous: log_uuid}, pi.job_log_ids)
+  end
+
+  test "job_logs for malformed pipeline" do
+    pi = find_pi_with(:active, "components_is_jobspec")
+    assert_empty(pi.job_log_ids.select { |_, log| not log.nil? })
+  end
+
+  def check_stderr_logs(token_name, pi_name, log_name)
+    pi = find_pi_with(token_name, pi_name)
+    actual_logs = pi.stderr_log_lines
+    expected_text = api_fixture("logs", log_name, "properties", "text")
+    expected_text.each_line do |log_line|
+      assert_includes(actual_logs, log_line.chomp)
+    end
+  end
+
+  test "stderr_logs for running pipeline" do
+    check_stderr_logs(:active,
+                      "pipeline_in_publicly_accessible_project",
+                      "log_line_for_pipeline_in_publicly_accessible_project")
+  end
+
+  test "stderr_logs for job in complete pipeline" do
+    check_stderr_logs(:active,
+                      "failed_pipeline_with_two_jobs",
+                      "crunchstat_for_previous_job")
+  end
+
+  test "has_readable_logs? for unrun pipeline" do
+    pi = find_pi_with(:active, "new_pipeline")
+    refute(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for running pipeline" do
+    pi = find_pi_with(:active, "running_pipeline_with_complete_job")
+    assert(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for complete pipeline" do
+    pi = find_pi_with(:active, "pipeline_in_publicly_accessible_project_but_other_objects_elsewhere")
+    assert(pi.has_readable_logs?)
+  end
+
+  test "has_readable_logs? for complete pipeline when jobs unreadable" do
+    pi = find_pi_with(:anonymous, "pipeline_in_publicly_accessible_project_but_other_objects_elsewhere")
+    refute(pi.has_readable_logs?)
+  end
  end
diff --git a/backports/python-ciso8601/fpm-info.sh b/backports/python-ciso8601/fpm-info.sh

new file mode 100644 (file)

index 0000000..925a375
--- /dev/null
+++ b/backports/python-ciso8601/fpm-info.sh
@@ -0,0 +1,8 @@
+case "$TARGET" in
+    centos*)
+        fpm_depends+=(glibc)
+        ;;
+    debian* | ubuntu*)
+        fpm_depends+=(libc6)
+        ;;
+esac
diff --git a/backports/python-llfuse/fpm-info.sh b/backports/python-llfuse/fpm-info.sh

new file mode 100644 (file)

index 0000000..c578365
--- /dev/null
+++ b/backports/python-llfuse/fpm-info.sh
@@ -0,0 +1,10 @@
+case "$TARGET" in
+    centos*)
+        build_depends+=('fuse-devel')
+        fpm_depends+=(glibc fuse-libs)
+        ;;
+    debian* | ubuntu*)
+        build_depends+=('libfuse-dev')
+        fpm_depends+=(libc6 libfuse2)
+        ;;
+esac
diff --git a/backports/python-pycrypto/fpm-info.sh b/backports/python-pycrypto/fpm-info.sh

new file mode 100644 (file)

index 0000000..cea7096
--- /dev/null
+++ b/backports/python-pycrypto/fpm-info.sh
@@ -0,0 +1,11 @@
+case "$TARGET" in
+    centos*)
+        fpm_depends+=(glibc)
+        ;;
+    debian8)
+        fpm_depends+=(libc6 libgmp10)
+        ;;
+    debian* | ubuntu*)
+        fpm_depends+=(libc6)
+        ;;
+esac
diff --git a/backports/python-pycurl/fpm-info.sh b/backports/python-pycurl/fpm-info.sh

new file mode 100644 (file)

index 0000000..4d2860f
--- /dev/null
+++ b/backports/python-pycurl/fpm-info.sh
@@ -0,0 +1,132 @@
+case "$TARGET" in
+    centos6)
+            fpm_depends+=(
+                cyrus-sasl-lib
+                glibc
+                keyutils-libs
+                krb5-libs
+                libcom_err
+                libcurl
+                libidn
+                libselinux
+                libssh2
+                nspr
+                nss
+                nss-softokn-freebl
+                nss-util
+                openldap
+                openssl
+                zlib
+            ) ;;
+    debian7)
+            fpm_depends+=(
+                libc6
+                libcomerr2
+                libcurl3-gnutls
+                libgcrypt11
+                libgnutls26
+                libgpg-error0
+                libgssapi-krb5-2
+                libidn11
+                libk5crypto3
+                libkeyutils1
+                libkrb5-3
+                libkrb5support0
+                libldap-2.4-2
+                libp11-kit0
+                librtmp0
+                libsasl2-2
+                libssh2-1
+                libtasn1-3
+                zlib1g
+            ) ;;
+    debian8)
+            fpm_depends+=(
+                libc6
+                libcomerr2
+                libcurl3-gnutls
+                libffi6
+                libgcrypt20
+                libgmp10
+                libgnutls-deb0-28
+                libgpg-error0
+                libgssapi-krb5-2
+                libhogweed2
+                libidn11
+                libk5crypto3
+                libkeyutils1
+                libkrb5-3
+                libkrb5support0
+                libldap-2.4-2
+                libnettle4
+                libp11-kit0
+                librtmp1
+                libsasl2-2
+                libssh2-1
+                libtasn1-6
+                zlib1g
+            ) ;;
+    ubuntu1204)
+            fpm_depends+=(
+                libasn1-8-heimdal
+                libc6
+                libcomerr2
+                libcurl3-gnutls
+                libgcrypt11
+                libgnutls26
+                libgpg-error0
+                libgssapi-krb5-2
+                libgssapi3-heimdal
+                libhcrypto4-heimdal
+                libheimbase1-heimdal
+                libheimntlm0-heimdal
+                libhx509-5-heimdal
+                libidn11
+                libk5crypto3
+                libkeyutils1
+                libkrb5-26-heimdal
+                libkrb5-3
+                libkrb5support0
+                libldap-2.4-2
+                libp11-kit0
+                libroken18-heimdal
+                librtmp0
+                libsasl2-2
+                libsqlite3-0
+                libtasn1-3
+                libwind0-heimdal
+                zlib1g
+            ) ;;
+    ubuntu1404)
+            fpm_depends+=(
+                libasn1-8-heimdal
+                libc6
+                libcomerr2
+                libcurl3-gnutls
+                libffi6
+                libgcrypt11
+                libgnutls26
+                libgpg-error0
+                libgssapi-krb5-2
+                libgssapi3-heimdal
+                libhcrypto4-heimdal
+                libheimbase1-heimdal
+                libheimntlm0-heimdal
+                libhx509-5-heimdal
+                libidn11
+                libk5crypto3
+                libkeyutils1
+                libkrb5-26-heimdal
+                libkrb5-3
+                libkrb5support0
+                libldap-2.4-2
+                libp11-kit0
+                libroken18-heimdal
+                librtmp0
+                libsasl2-2
+                libsqlite3-0
+                libtasn1-6
+                libwind0-heimdal
+                zlib1g
+            ) ;;
+esac
diff --git a/crunch_scripts/crunchutil/vwd.py b/crunch_scripts/crunchutil/vwd.py

index 3d54c9c2b32c1fc05c2ce536a5ece807df59d49b..0ae1c4620995014f61d17379bca756d2415f6e4c 100644 (file)
--- a/crunch_scripts/crunchutil/vwd.py
+++ b/crunch_scripts/crunchutil/vwd.py
@@ -1,7 +1,8 @@
  import arvados
  import os
-import robust_put
  import stat
+import arvados.commands.run
+import logging
  
  # Implements "Virtual Working Directory"
  # Provides a way of emulating a shared writable directory in Keep based
@@ -32,23 +33,71 @@ def checkout(source_collection, target_dir, keepmount=None):
          for f in files:
              os.symlink(os.path.join(root, f), os.path.join(target_dir, rel, f))
  
-# Delete all symlinks and check in any remaining normal files.
-# If merge == True, merge the manifest with source_collection and return a
-# CollectionReader for the combined collection.
-def checkin(source_collection, target_dir, merge=True):
-    # delete symlinks, commit directory, merge manifests and return combined
-    # collection.
+def checkin(target_dir):
+    """Write files in `target_dir` to Keep.
+
+    Regular files or symlinks to files outside the keep mount are written to
+    Keep as normal files (Keep does not support symlinks).
+
+    Symlinks to files in the keep mount will result in files in the new
+    collection which reference existing Keep blocks, no data copying necessary.
+
+    Returns a new Collection object, with data flushed but the collection record
+    not saved to the API.
+
+    """
+
+    outputcollection = arvados.collection.Collection(num_retries=5)
+
+    if target_dir[-1:] != '/':
+        target_dir += '/'
+
+    collections = {}
+
+    logger = logging.getLogger("arvados")
+
+    last_error = None
      for root, dirs, files in os.walk(target_dir):
          for f in files:
-            s = os.lstat(os.path.join(root, f))
-            if stat.S_ISLNK(s.st_mode):
-                os.unlink(os.path.join(root, f))
-
-    uuid = robust_put.upload(target_dir)
-    if merge:
-        cr1 = arvados.CollectionReader(source_collection)
-        cr2 = arvados.CollectionReader(uuid)
-        combined = arvados.CollectionReader(cr1.manifest_text() + cr2.manifest_text())
-        return combined
-    else:
-        return arvados.CollectionReader(uuid)
+            try:
+                s = os.lstat(os.path.join(root, f))
+
+                writeIt = False
+
+                if stat.S_ISREG(s.st_mode):
+                    writeIt = True
+                elif stat.S_ISLNK(s.st_mode):
+                    # 1. check if it is a link into a collection
+                    real = os.path.split(os.path.realpath(os.path.join(root, f)))
+                    (pdh, branch) = arvados.commands.run.is_in_collection(real[0], real[1])
+                    if pdh is not None:
+                        # 2. load collection
+                        if pdh not in collections:
+                            # 2.1 make sure it is flushed (see #5787 note 11)
+                            fd = os.open(real[0], os.O_RDONLY)
+                            os.fsync(fd)
+                            os.close(fd)
+
+                            # 2.2 get collection from API server
+                            collections[pdh] = arvados.collection.CollectionReader(pdh,
+                                                                                   api_client=outputcollection._my_api(),
+                                                                                   keep_client=outputcollection._my_keep(),
+                                                                                   num_retries=5)
+                        # 3. copy arvfile to new collection
+                        outputcollection.copy(branch, os.path.join(root[len(target_dir):], f), source_collection=collections[pdh])
+                    else:
+                        writeIt = True
+
+                if writeIt:
+                    reldir = root[len(target_dir):]
+                    with outputcollection.open(os.path.join(reldir, f), "wb") as writer:
+                        with open(os.path.join(root, f), "rb") as reader:
+                            dat = reader.read(64*1024)
+                            while dat:
+                                writer.write(dat)
+                                dat = reader.read(64*1024)
+            except (IOError, OSError) as e:
+                logger.error(e)
+                last_error = e
+
+    return (outputcollection, last_error)
diff --git a/crunch_scripts/run-command b/crunch_scripts/run-command

index c07debd787eecfc6696c5d614d69c013f8028dd7..a6c5ef981ce35fb5e52f8bab9c6628c803e45872 100755 (executable)
--- a/crunch_scripts/run-command
+++ b/crunch_scripts/run-command
@@ -57,12 +57,10 @@ else:
      jobp = json.loads(args.script_parameters)
      os.environ['JOB_UUID'] = 'zzzzz-8i9sb-1234567890abcde'
      os.environ['TASK_UUID'] = 'zzzzz-ot0gb-1234567890abcde'
-    os.environ['CRUNCH_SRC'] = '/tmp/crunche-src'
+    os.environ['CRUNCH_SRC'] = '/tmp/crunch-src'
      if 'TASK_KEEPMOUNT' not in os.environ:
          os.environ['TASK_KEEPMOUNT'] = '/keep'
  
-links = []
-
  def sub_tmpdir(v):
      return os.path.join(arvados.current_task().tmpdir, 'tmpdir')
  
@@ -333,6 +331,13 @@ try:
          if not args.dry_run:
              stdoutfile = open(stdoutname, "wb")
  
+    if "task.env" in taskp:
+        env = copy.copy(os.environ)
+        for k,v in taskp["task.env"].items():
+            env[k] = subst.do_substitution(taskp, v)
+    else:
+        env = None
+
      logger.info("{}{}{}".format(' | '.join([' '.join(c) for c in cmd]), (" < " + stdinname) if stdinname is not None else "", (" > " + stdoutname) if stdoutname is not None else ""))
  
      if args.dry_run:
@@ -365,7 +370,7 @@ try:
              # this is an intermediate command in the pipeline, so its stdout should go to a pipe
              next_stdout = subprocess.PIPE
  
-        sp = subprocess.Popen(cmd[i], shell=False, stdin=next_stdin, stdout=next_stdout)
+        sp = subprocess.Popen(cmd[i], shell=False, stdin=next_stdin, stdout=next_stdout, env=env)
  
          # Need to close the FDs on our side so that subcommands will get SIGPIPE if the
          # consuming process ends prematurely.
@@ -415,31 +420,27 @@ signal.signal(signal.SIGINT, signal.SIG_DFL)
  signal.signal(signal.SIGTERM, signal.SIG_DFL)
  signal.signal(signal.SIGQUIT, signal.SIG_DFL)
  
-for l in links:
-    os.unlink(l)
-
  logger.info("the following output files will be saved to keep:")
  
-subprocess.call(["find", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr)
+subprocess.call(["find", "-L", ".", "-type", "f", "-printf", "run-command: %12.12s %h/%f\\n"], stdout=sys.stderr, cwd=outdir)
  
  logger.info("start writing output to keep")
  
-if "task.vwd" in taskp:
-    if "task.foreach" in jobp:
-        # This is a subtask, so don't merge with the original collection, that will happen at the end
-        outcollection = vwd.checkin(subst.do_substitution(taskp, taskp["task.vwd"]), outdir, merge=False).manifest_text()
-    else:
-        # Just a single task, so do merge with the original collection
-        outcollection = vwd.checkin(subst.do_substitution(taskp, taskp["task.vwd"]), outdir, merge=True).manifest_text()
-else:
-    outcollection = robust_put.upload(outdir, logger)
+if "task.vwd" in taskp and "task.foreach" in jobp:
+    for root, dirs, files in os.walk(outdir):
+        for f in files:
+            s = os.lstat(os.path.join(root, f))
+            if stat.S_ISLNK(s.st_mode):
+                os.unlink(os.path.join(root, f))
+
+(outcollection, checkin_error) = vwd.checkin(outdir)
  
  # Success if we ran any subprocess, and they all exited 0.
-success = rcode and all(status == 0 for status in rcode.itervalues())
+success = rcode and all(status == 0 for status in rcode.itervalues()) and not checkin_error
  
  api.job_tasks().update(uuid=arvados.current_task()['uuid'],
                                       body={
-                                         'output': outcollection,
+                                         'output': outcollection.manifest_text(),
                                           'success': success,
                                           'progress':1.0
                                       }).execute()
diff --git a/doc/_config.yml b/doc/_config.yml

index c1b79836ea04aaa9060fd52dfd20cccd6b836f6f..1bdd2ab4461c244b4ac2a2ee5eb725694c67ad62 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -8,20 +8,21 @@
  
  baseurl:
  arvados_api_host: localhost
-arvados_workbench_host: localhost
+arvados_workbench_host: http://localhost
  
  exclude: ["Rakefile", "tmp", "vendor"]
  
  navbar:
-  start:
-    - Getting Started:
-      - start/index.html.textile.liquid
-    - Quickstart:
-      - start/getting_started/firstpipeline.html.textile.liquid
-    - Common Use Cases:
-      - start/getting_started/sharedata.html.textile.liquid
-    - Next Steps:
-      - start/getting_started/nextsteps.html.textile.liquid
+  #start:
+    #- Getting Started:
+      #- start/index.html.textile.liquid
+    #- Quickstart:
+      #- start/getting_started/publicproject.html.textile.liquid
+      #- start/getting_started/firstpipeline.html.textile.liquid
+    #- Common Use Cases:
+      #- start/getting_started/sharedata.html.textile.liquid
+    #- Next Steps:
+      #- start/getting_started/nextsteps.html.textile.liquid
  
    userguide:
      - Welcome:
@@ -31,6 +32,7 @@ navbar:
        - user/getting_started/workbench.html.textile.liquid
        - user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
      - Access an Arvados virtual machine:
+      - user/getting_started/vm-login-with-webshell.html.textile.liquid
        - user/getting_started/ssh-access-unix.html.textile.liquid
        - user/getting_started/ssh-access-windows.html.textile.liquid
        - user/getting_started/check-environment.html.textile.liquid
@@ -40,9 +42,13 @@ navbar:
        - user/tutorials/tutorial-keep-get.html.textile.liquid
        - user/tutorials/tutorial-keep-mount.html.textile.liquid
        - user/topics/keep.html.textile.liquid
+      - user/topics/arv-copy.html.textile.liquid
      - Run a pipeline on the command line:
        - user/topics/running-pipeline-command-line.html.textile.liquid
        - user/topics/arv-run.html.textile.liquid
+    - Working with Arvados Repositories:
+      - user/tutorials/add-new-repository.html.textile.liquid
+      - user/tutorials/git-arvados-guide.html.textile.liquid
      - Develop a new pipeline:
        - user/tutorials/intro-crunch.html.textile.liquid
        - user/tutorials/running-external-program.html.textile.liquid
@@ -138,19 +144,18 @@ navbar:
    installguide:
      - Overview:
        - install/index.html.textile.liquid
-    - Docker:
-      - install/pre-built-docker.html.textile.liquid
-      - install/install-docker.html.textile.liquid
      - Manual installation:
        - install/install-manual-prerequisites.html.textile.liquid
+      - install/install-sso.html.textile.liquid
        - install/install-api-server.html.textile.liquid
+      - install/install-arv-git-httpd.html.textile.liquid
        - install/install-workbench-app.html.textile.liquid
        - install/install-shell-server.html.textile.liquid
        - install/create-standard-objects.html.textile.liquid
        - install/install-keepstore.html.textile.liquid
        - install/install-keepproxy.html.textile.liquid
        - install/install-crunch-dispatch.html.textile.liquid
+      - install/install-compute-node.html.textile.liquid
+    - Helpful hints:
+      - install/copy_pipeline_from_curoverse.html.textile.liquid
        - install/cheat_sheet.html.textile.liquid
-    - Software prerequisites:
-      - install/install-manual-prerequisites-ruby.html.textile.liquid
-      - install/install-sso.html.textile.liquid
diff --git a/doc/_includes/_arv_copy_expectations.liquid b/doc/_includes/_arv_copy_expectations.liquid

new file mode 100644 (file)

index 0000000..a76c9e7
--- /dev/null
+++ b/doc/_includes/_arv_copy_expectations.liquid
@@ -0,0 +1,6 @@
+{% include 'notebox_begin' %}
+As stated above, arv-copy is recursive by default and requires a working git repository in the destination cluster. If you do not have a repository created, you can follow the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *tutorial* repository created in that page as the example.
+
+<br/>In addition, arv-copy requires git when copying to a git repository. Please make sure that git is installed and available.
+
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_arv_run_redirection.liquid b/doc/_includes/_arv_run_redirection.liquid

index aa633666f593b8129ff8c96c8ebddbea6a294f95..c63c2d93345f0e822a073211875a42ed7b34acb8 100644 (file)
--- a/doc/_includes/_arv_run_redirection.liquid
+++ b/doc/_includes/_arv_run_redirection.liquid
@@ -1,5 +1,7 @@
  <notextile>
  <pre>
+$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
+$ <span class="userinput">ls *.fastq</span>
  $ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> output.txt</span>
  [...]
   1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > output.txt
diff --git a/doc/_includes/_install_debian_key.liquid b/doc/_includes/_install_debian_key.liquid

new file mode 100644 (file)

index 0000000..2b4793c
--- /dev/null
+++ b/doc/_includes/_install_debian_key.liquid
@@ -0,0 +1,4 @@
+<notextile>
+<pre><code>~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
+</code></pre>
+</notextile>
diff --git a/doc/_includes/_install_git.liquid b/doc/_includes/_install_git.liquid

new file mode 100644 (file)

index 0000000..2ca1ce4
--- /dev/null
+++ b/doc/_includes/_install_git.liquid
@@ -0,0 +1,3 @@
+{% include 'notebox_begin' %}
+Arvados requires git version 1.7.10 or later. If you are using an earlier version of git, please update your git version.
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_install_git_curl.liquid b/doc/_includes/_install_git_curl.liquid

new file mode 100644 (file)

index 0000000..edf07a7
--- /dev/null
+++ b/doc/_includes/_install_git_curl.liquid
@@ -0,0 +1,13 @@
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install git curl</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install git curl</span>
+</code></pre>
+</notextile>
diff --git a/doc/_includes/_install_postgres.liquid b/doc/_includes/_install_postgres.liquid

new file mode 100644 (file)

index 0000000..82c2c2d
--- /dev/null
+++ b/doc/_includes/_install_postgres.liquid
@@ -0,0 +1,22 @@
+On a Debian-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install libpq-dev postgresql</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql postgresql-devel</span>
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+
+If you intend to use specific versions of these packages from Software Collections, you may have to adapt some of the package names to match. For example:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install postgresql92 postgresql92-postgresql-devel</span></code></pre></notextile>
+
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_install_redhat_key.liquid b/doc/_includes/_install_redhat_key.liquid

new file mode 100644 (file)

index 0000000..9fd219b
--- /dev/null
+++ b/doc/_includes/_install_redhat_key.liquid
@@ -0,0 +1,6 @@
+<notextile>
+<pre><code>~$ <span class="userinput">gpg --keyserver pool.sks-keyservers.net --recv-keys 1078ECD7</span>
+~$ <span class="userinput">gpg --armor --export 1078ECD7 >/tmp/curoverse.key</span>
+~$ <span class="userinput">sudo rpm --import /tmp/curoverse.key</span>
+</code></pre>
+</notextile>
diff --git a/doc/_includes/_install_ruby_and_bundler.liquid b/doc/_includes/_install_ruby_and_bundler.liquid

new file mode 100644 (file)

index 0000000..369bf46
--- /dev/null
+++ b/doc/_includes/_install_ruby_and_bundler.liquid
@@ -0,0 +1,64 @@
+Currently, only Ruby 2.1 is supported.
+
+h4(#rvm). *Option 1: Install with RVM*
+
+<notextile>
+<pre><code><span class="userinput">sudo gpg --keyserver hkp://keys.gnupg.net --recv-keys 409B6B1796C275462A1703113804BB82D39DC0E3
+\curl -sSL https://get.rvm.io | sudo bash -s stable --ruby=2.1
+sudo adduser "$USER" rvm
+</span></code></pre></notextile>
+
+Either log out and log back in to activate RVM, or explicitly load it in all open shells like this:
+
+<notextile>
+<pre><code><span class="userinput">source /usr/local/rvm/scripts/rvm
+</span></code></pre></notextile>
+
+Once RVM is activated in your shell, install Bundler:
+
+<notextile>
+<pre><code>~$ <span class="userinput">gem install bundler</span>
+</code></pre></notextile>
+
+h4(#fromsource). *Option 2: Install from source*
+
+Install prerequisites for Debian 7 or 8:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+    bison build-essential gettext libcurl3 libcurl3-gnutls \
+    libcurl4-openssl-dev libpcre3-dev libreadline-dev \
+    libssl-dev libxslt1.1 zlib1g-dev
+</span></code></pre></notextile>
+
+Install prerequisites for CentOS 6:
+
+<notextile>
+<pre><code><span class="userinput">sudo yum install \
+    libyaml-devel glibc-headers autoconf gcc-c++ glibc-devel \
+    patch readline-devel zlib-devel libffi-devel openssl-devel \
+    automake libtool bison sqlite-devel
+</span></code></pre></notextile>
+
+Install prerequisites for Ubuntu 12.04 or 14.04:
+
+<notextile>
+<pre><code><span class="userinput">sudo apt-get install \
+    gawk g++ gcc make libc6-dev libreadline6-dev zlib1g-dev libssl-dev \
+    libyaml-dev libsqlite3-dev sqlite3 autoconf libgdbm-dev \
+    libncurses5-dev automake libtool bison pkg-config libffi-dev
+</span></code></pre></notextile>
+
+Build and install Ruby:
+
+<notextile>
+<pre><code><span class="userinput">mkdir -p ~/src
+cd ~/src
+curl http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.6.tar.gz | tar xz
+cd ruby-2.1.6
+./configure --no-install-rdoc
+make
+sudo make install
+
+sudo gem install bundler</span>
+</code></pre></notextile>
diff --git a/doc/_includes/_navbar_top.liquid b/doc/_includes/_navbar_top.liquid

index 3e23e6d4e84d543e331964c43263bc3dc966e2ea..6d88b2b4768c0479cf8e77b9442f4f52da16427a 100644 (file)
--- a/doc/_includes/_navbar_top.liquid
+++ b/doc/_includes/_navbar_top.liquid
@@ -7,16 +7,16 @@
          <span class="icon-bar"></span>
          <span class="icon-bar"></span>
        </button>
-      <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados</a>
+      <a class="navbar-brand" href="{{ site.baseurl }}/">Arvados Docs</a>
      </div>
      <div class="collapse navbar-collapse" id="bs-navbar-collapse">
        <ul class="nav navbar-nav">
-        <li {% if page.navsection == 'start' %} class="active" {% endif %}><a href="{{ site.baseurl }}/start/index.html">Getting&nbsp;Started</a></li>
+        <!--<li {% if page.navsection == 'start' %} class="active" {% endif %}><a href="{{ site.baseurl }}/start/index.html">Getting&nbsp;Started</a></li>-->
          <li {% if page.navsection == 'userguide' %} class="active" {% endif %}><a href="{{ site.baseurl }}/user/index.html">User&nbsp;Guide</a></li>
          <li {% if page.navsection == 'sdk' %} class="active" {% endif %}><a href="{{ site.baseurl }}/sdk/index.html">SDKs</a></li>
          <li {% if page.navsection == 'api' %} class="active" {% endif %}><a href="{{ site.baseurl }}/api/index.html">API</a></li>
          <li {% if page.navsection == 'installguide' %} class="active" {% endif %}><a href="{{ site.baseurl }}/install/index.html">Install</a></li>
-        <li><a href="https://arvados.org/projects/arvados/" style="padding-left: 2em">Developer Site&nbsp;&raquo;</a></li>
+        <li><a href="https://arvados.org/projects/arvados/" style="padding-left: 2em">arvados.org&nbsp;&raquo;</a></li>
        </ul>
  
        <div class="pull-right" style="padding-top: 6px">
diff --git a/doc/_includes/_note_python27_sc.liquid b/doc/_includes/_note_python27_sc.liquid

new file mode 100644 (file)

index 0000000..7aad695
--- /dev/null
+++ b/doc/_includes/_note_python27_sc.liquid
@@ -0,0 +1,5 @@
+{% include 'notebox_begin' %}
+
+On older Red Hat-based systems, these packages require the "python27 Software Collection":https://www.softwarecollections.org/en/scls/rhscl/python27/.
+
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_ssh_addkey.liquid b/doc/_includes/_ssh_addkey.liquid

index 3770635777f9de7e5707e76d694361a42e879531..098767300fede09944adce305ef017739ebf3b76 100644 (file)
--- a/doc/_includes/_ssh_addkey.liquid
+++ b/doc/_includes/_ssh_addkey.liquid
@@ -5,22 +5,13 @@ h1(#workbench). Adding your key to Arvados Workbench
  
  h3. From the Workbench dashboard
  
-If you have no SSH keys registered, there should be a notification asking you to provide your SSH public key.  In the Workbench top navigation menu, look for a dropdown menu with your email address in upper right corner. It will have an icon such as <span class="badge badge-alert">1</span> (the number indicates there are new notifications).  Click on this icon and a dropdown menu should appear with a message asking you to add your public key.  Paste your public key into the text area provided and click on the check button to submit the key.  You are now ready to "log into an Arvados VM":#login.
-
-h3. Alternate way to add SSH keys
-
-Click on the link with your _email address_ in the upper right corner to access the user settings menu, and click on the menu item *Manage account* to go to the account management page.
-
-On the *Manage account* page, click on the button <span class="btn btn-primary">*+* Add new SSH key</span> button in the upper right corner of the page in the SSH Keys panel.
-
-This will open a popup as shown in this screenshot:
+In the Workbench top navigation menu, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> to access the user settings menu and click on the menu item *SSH keys* to go to the *SSH keys* page. Click on the <span class="btn btn-primary">*+* Add new SSH key</span> button in this page. This will open a popup as shown in this screenshot:
  
  !{{ site.baseurl }}/images/ssh-adding-public-key.png!
-
-Paste the public key that you copied to the cliboard in the previous section into the popup text box labeled *Public Key* and click on the <span class="btn btn-primary">Submit</span> button to save it. This should refresh the Manage account page with the fingerprint of the public key that you just added in the SSH Keys panel.  You are now ready to "log into an Arvados VM":#login.
+Paste your public key into the text area labeled *Public Key*, and click on the <span class="btn btn-primary">Submit</span> button. You are now ready to "log into an Arvados VM":#login.
  
  h1(#login). Using SSH to log into an Arvados VM
  
-To see a list of virtual machines that you have access to and determine the name and login information, click on the link with your _email address_ in the upper right corner and click on the menu item *Manage account* to go to the account management page. On this page, you will see a *Virtual Machines* panel, which lists the virtual machines you can access. The *hostname* column lists the name of each available VM.  The *logins* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*.  Replace these with your hostname and login name as appropriate.
+To see a list of virtual machines that you have access to and determine the name and login information, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu and click on the menu item *Virtual machines* to go to the Virtual machines page. This page lists the virtual machines you can access. The *hostname* column lists the name of each available VM.  The *logins* column will have a list of comma separated values of the form @you@. In this guide the hostname will be *_shell_* and the login will be *_you_*.  Replace these with your hostname and login name as appropriate.
  
  
diff --git a/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid b/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid

index c2be9e281a4c22f65a53c8be6bf9e8aaacbaf22a..413e51f9c057565647f829506506b4fd58aa7c8d 100644 (file)
--- a/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid
+++ b/doc/_includes/_tutorial_bwa_sortsam_pipeline.liquid
@@ -7,7 +7,7 @@
              "repository": "arvados",
              "script_parameters": {
                  "command": [
-                    "bwa",
+                    "$(dir $(bwa_collection))/bwa",
                      "mem",
                      "-t",
                      "$(node.cores)",
@@ -21,14 +21,20 @@
                      "required": true,
                      "dataclass": "Collection"
                  },
+                "bwa_collection": {
+                    "required": true,
+                    "dataclass": "Collection",
+                    "default": "39c6f22d40001074f4200a72559ae7eb+5745"
+                },
                  "sample": {
                      "required": true,
                      "dataclass": "Collection"
                  },
-                "stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam"
+                "task.stdout": "$(basename $(glob $(dir $(sample))/*_1.fastq)).sam"
              },
              "runtime_constraints": {
-                "docker_image": "arvados/jobs-java-bwa-samtools"
+                "docker_image": "bcosc/arv-base-java",
+                "arvados_sdk_version": "master"
              }
          },
          "SortSam": {
@@ -58,7 +64,8 @@
                  }
              },
              "runtime_constraints": {
-                "docker_image": "arvados/jobs-java-bwa-samtools"
+                "docker_image": "bcosc/arv-base-java",
+                "arvados_sdk_version": "master"
              }
          }
      }
diff --git a/doc/_includes/_tutorial_cluster_name.liquid b/doc/_includes/_tutorial_cluster_name.liquid

new file mode 100644 (file)

index 0000000..febd240
--- /dev/null
+++ b/doc/_includes/_tutorial_cluster_name.liquid
@@ -0,0 +1,3 @@
+{% include 'notebox_begin' %}
+This tutorial assumes you are using the default Arvados instance, @qr1hi@. If you are using a different instance, replace @qr1hi@ with your instance. See "Accessing Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html for more details.
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_tutorial_expectations.liquid b/doc/_includes/_tutorial_expectations.liquid

index a371d2489a6165e17ffb01d7248559944ba9afd3..333df4452bac965823886cd4608535f20cc2c743 100644 (file)
--- a/doc/_includes/_tutorial_expectations.liquid
+++ b/doc/_includes/_tutorial_expectations.liquid
@@ -1,3 +1,3 @@
  {% include 'notebox_begin' %}
-This tutorial assumes either that you are logged into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
+This tutorial assumes that you are logged into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
  {% include 'notebox_end' %}
diff --git a/doc/_includes/_tutorial_expectations_workstation.liquid b/doc/_includes/_tutorial_expectations_workstation.liquid

new file mode 100644 (file)

index 0000000..75fb256
--- /dev/null
+++ b/doc/_includes/_tutorial_expectations_workstation.liquid
@@ -0,0 +1,3 @@
+{% include 'notebox_begin' %}
+This tutorial assumes that you have installed the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation and have a "working environment.":{{site.baseurl}}/user/getting_started/check-environment.html
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_tutorial_git_repo_expectations.liquid b/doc/_includes/_tutorial_git_repo_expectations.liquid

new file mode 100644 (file)

index 0000000..c6ff50d
--- /dev/null
+++ b/doc/_includes/_tutorial_git_repo_expectations.liquid
@@ -0,0 +1,3 @@
+{% include 'notebox_begin' %}
+This tutorial assumes that you have a working Arvados repository. If you do not have a repository created, you can follow the instructions in the "Adding a new repository":{{site.baseurl}}/user/tutorials/add-new-repository.html page. We will use the *$USER/tutorial* repository created in that page as the example.
+{% include 'notebox_end' %}
diff --git a/doc/_includes/_tutorial_submit_job.liquid b/doc/_includes/_tutorial_submit_job.liquid

index 57063b3f3b22e98177510eec269151e3a17f8cdf..3ea7602d3f0a78235ad946d25e4caea9ad14cde7 100644 (file)
--- a/doc/_includes/_tutorial_submit_job.liquid
+++ b/doc/_includes/_tutorial_submit_job.liquid
@@ -2,7 +2,7 @@
    "name":"My md5 pipeline",
    "components":{
      "do_hash":{
-      "repository":"$USER",
+      "repository":"$USER/$USER",
        "script":"hash.py",
        "script_version":"master",
        "runtime_constraints":{
diff --git a/doc/_layouts/default.html.liquid b/doc/_layouts/default.html.liquid

index b0b210f0e7a6397167636f0465d477175441600a..ed232c67734f700a6d686352ed26b7e7a314b021 100644 (file)
--- a/doc/_layouts/default.html.liquid
+++ b/doc/_layouts/default.html.liquid
@@ -2,7 +2,7 @@
  <html>
    <head>
      <meta charset="utf-8">
-    <title>{% unless page.title == "Arvados" %} Arvados | Documentation | {% endunless %}{{ page.title }}</title>
+    <title>{% unless page.title == "Arvados | Documentation" %} Arvados | Documentation | {% endunless %}{{ page.title }}</title>
      <meta name="viewport" content="width=device-width, initial-scale=1.0">
      <meta name="description" content="">
      <meta name="author" content="">
@@ -14,6 +14,7 @@
      <link href="{{ site.baseurl }}/css/code.css" rel="stylesheet">
      <link href="{{ site.baseurl }}/css/font-awesome.css" rel="stylesheet">
      <link href="{{ site.baseurl }}/css/carousel-override.css" rel="stylesheet">
+    <link href="{{ site.baseurl }}/css/button-override.css" rel="stylesheet">
      <style>
        html {
        height:100%;
diff --git a/doc/api/methods/jobs.html.textile.liquid b/doc/api/methods/jobs.html.textile.liquid

index ac68129a5f49318a2e94084220703e34b430bf7d..90a3c4c72216136909ca6fe1f5c074d5298b440f 100644 (file)
--- a/doc/api/methods/jobs.html.textile.liquid
+++ b/doc/api/methods/jobs.html.textile.liquid
@@ -34,7 +34,7 @@ table(table table-bordered table-condensed).
  |minimum_script_version |string     |Git branch, tag, or commit hash specifying the minimum acceptable script version (earliest ancestor) to consider when deciding whether to re-use a past job.[1]|query|@"c3e86c9"@|
  |exclude_script_versions|array of strings|Git commit branches, tags, or hashes to exclude when deciding whether to re-use a past job.|query|@["8f03c71","8f03c71"]@
  @["badtag1","badtag2"]@|
-|filters|array|Conditions to find Jobs to reuse.|query||
+|filters|array of arrays|Conditions to find Jobs to reuse.|query||
  |find_or_create         |boolean    |Before creating, look for an existing job that has identical script, script_version, and script_parameters to those in the present job, has nondeterministic=false, and did not fail (it could be queued, running, or completed). If such a job exists, respond with the existing job instead of submitting a new one.|query|@false@|
  
  When a job is submitted to the queue using the **create** method, the @script_version@ attribute is updated to a full 40-character Git commit hash based on the current content of the specified repository. If @script_version@ cannot be resolved, the job submission is rejected.
@@ -60,8 +60,8 @@ Because Arvados records the exact version of the script, input parameters, and r
  notextile. <div class="spaced-out">
  
  # If @find_or_create@ is false or omitted, create a new job and skip the rest of these steps.
-# If @filters@ are specified, find jobs that match those filters.  Filters *must* be specified to limit the @repository@ and @script@ attributes.  An error is returned if they are missing.
-# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found.
+# If @filters@ are specified, find jobs that match those filters. If any filters are given, there must be at least one filter on the @repository@ attribute and one on the @script@ attribute: otherwise an error is returned.
+# If @filters@ are not specified, find jobs with the same @repository@ and @script@, with a @script_version@ between @minimum_script_version@ and @script_version@ inclusively (excluding @excluded_script_versions@), and a @docker_image_locator@ with the latest Collection that matches the submitted job's @docker_image@ constraint.  If the submitted job includes an @arvados_sdk_version@ constraint, jobs must have an @arvados_sdk_version@ between that refspec and HEAD to be found. *This form is deprecated: use filters instead.*
  # If the found jobs include a completed job, and all found completed jobs have consistent output, return one of them.  Which specific job is returned is undefined.
  # If the found jobs only include incomplete jobs, return one of them.  Which specific job is returned is undefined.
  # If no job has been returned so far, create and return a new job.
@@ -76,7 +76,7 @@ Run the script "crunch_scripts/hash.py" in the repository "you" using the "maste
  {
    "job": {
      "script": "hash.py",
-    "repository": "<b>you</b>",
+    "repository": "<b>you</b>/<b>you</b>",
      "script_version": "master",
      "script_parameters": {
        "input": "c1bad4b39ca5a924e481008009d94e32+210"
@@ -92,7 +92,7 @@ Run using exactly the version "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5". Arvado
  {
    "job": {
      "script": "hash.py",
-    "repository": "<b>you</b>",
+    "repository": "<b>you</b>/<b>you</b>",
      "script_version": "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5",
      "script_parameters": {
        "input": "c1bad4b39ca5a924e481008009d94e32+210"
@@ -108,7 +108,7 @@ Arvados should re-use a previous job if the "script_version" of the previous job
  {
    "job": {
      "script": "hash.py",
-    "repository": "<b>you</b>",
+    "repository": "<b>you</b>/<b>you</b>",
      "script_version": "master",
      "script_parameters": {
        "input": "c1bad4b39ca5a924e481008009d94e32+210"
@@ -126,27 +126,27 @@ The same behavior, using filters:
  {
    "job": {
      "script": "hash.py",
-    "repository": "<b>you</b>",
+    "repository": "<b>you</b>/<b>you</b>",
      "script_version": "master",
      "script_parameters": {
        "input": "c1bad4b39ca5a924e481008009d94e32+210"
      }
    },
    "filters": [["script", "=", "hash.py"],
-              ["repository", "=", "<b>you</b>"],
+              ["repository", "=", "<b>you</b>/<b>you</b>"],
                ["script_version", "in git", "earlier_version_tag"],
                ["script_version", "not in git", "blacklisted_version_tag"]],
    "find_or_create": true
  }
  </pre></notextile>
  
-Run the script "crunch_scripts/monte-carlo.py" in the repository "you" using the current "master" commit. Because it is marked as "nondeterministic", this job will not be considered as a suitable candidate for future job submissions that use the "find_or_create" feature.
+Run the script "crunch_scripts/monte-carlo.py" in the repository "you/you" using the current "master" commit. Because it is marked as "nondeterministic", this job will not be considered as a suitable candidate for future job submissions that use the "find_or_create" feature.
  
  <notextile><pre>
  {
    "job": {
      "script": "monte-carlo.py",
-    "repository": "<b>you</b>",
+    "repository": "<b>you</b>/<b>you</b>",
      "script_version": "master",
      "nondeterministic": true,
      "script_parameters": {
diff --git a/doc/api/schema/Job.html.textile.liquid b/doc/api/schema/Job.html.textile.liquid

index 80f5de6f06ff9eae7b31947bdfc3147605027cae..fd635034b1e8db79b97a1893de07d07f87ed95ee 100644 (file)
--- a/doc/api/schema/Job.html.textile.liquid
+++ b/doc/api/schema/Job.html.textile.liquid
@@ -23,7 +23,10 @@ table(table table-bordered table-condensed).
  |_. Attribute|_. Type|_. Description|_. Notes|
  |script|string|The filename of the job script.|This program will be invoked by Crunch for each job task. It is given as a path to an executable file, relative to the @/crunch_scripts@ directory in the Git tree specified by the _repository_ and _script_version_ attributes.|
  |script_parameters|hash|The input parameters for the job.|Conventionally, one of the parameters is called @"input"@. Typically, some parameter values are collection UUIDs. Ultimately, though, the significance of parameters is left entirely up to the script itself.|
-|repository|string|Git repository|Given as the name of a locally hosted Git repository.|
+|repository|string|Git repository name or URL.|Source of the repository where the given script_version is to be found. This can be given as the name of a locally hosted repository, or as a publicly accessible URL starting with @git://@, @http://@, or @https://@.
+Examples:
+@yourusername/yourrepo@
+@https://github.com/curoverse/arvados.git@|
  |script_version|string|Git commit|During a **create** transaction, this is the Git branch, tag, or hash supplied by the client. Before the job starts, Arvados updates it to the full 40-character SHA-1 hash of the commit used by the job.
  See "Specifying Git versions":#script_version below for more detail about acceptable ways to specify a commit.|
  |cancelled_by_client_uuid|string|API client ID|Is null if job has not been cancelled|
diff --git a/doc/api/schema/PipelineTemplate.html.textile.liquid b/doc/api/schema/PipelineTemplate.html.textile.liquid

index 2b215c2ba37ba52e623dd8003fbc233e7aa53752..444960a6ea46e9f38a389e06814b743cf6a4680d 100644 (file)
--- a/doc/api/schema/PipelineTemplate.html.textile.liquid
+++ b/doc/api/schema/PipelineTemplate.html.textile.liquid
@@ -51,7 +51,7 @@ This is a pipeline named "Filter MD5 hash values" with two components, "do_hash"
    "components": {
      "do_hash": {
        "script": "hash.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": {
          "input": {
@@ -64,7 +64,7 @@ This is a pipeline named "Filter MD5 hash values" with two components, "do_hash"
      },
      "filter": {
        "script": "0-filter.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": {
          "input": {
@@ -84,13 +84,13 @@ This pipeline consists of three components.  The components "thing1" and "thing2
    "components": {
      "cat_in_the_hat": {
        "script": "cat.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": { }
      },
      "thing1": {
        "script": "thing1.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": {
          "input": {
@@ -100,7 +100,7 @@ This pipeline consists of three components.  The components "thing1" and "thing2
      },
      "thing2": {
        "script": "thing2.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": {
          "input": {
@@ -120,19 +120,19 @@ This pipeline consists of three components.  The component "cleanup" depends on
    "components": {
      "thing1": {
        "script": "thing1.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": { }
      },
      "thing2": {
        "script": "thing2.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": { }
      },
      "cleanup": {
        "script": "cleanup.py",
-      "repository": "<b>you</b>",
+      "repository": "<b>you</b>/<b>you</b>",
        "script_version": "master",
        "script_parameters": {
          "mess1": {
diff --git a/doc/api/schema/Repository.html.textile.liquid b/doc/api/schema/Repository.html.textile.liquid

index 0308f7d65c8ae04a575e6f64529c5202dffd9af6..0f9b25ec2ce911be6a0f5a822c82ec5e8bc1e5a8 100644 (file)
--- a/doc/api/schema/Repository.html.textile.liquid
+++ b/doc/api/schema/Repository.html.textile.liquid
@@ -18,6 +18,8 @@ Each Repository has, in addition to the usual "attributes of Arvados resources":
  
  table(table table-bordered table-condensed).
  |_. Attribute|_. Type|_. Description|_. Example|
-|name|string|||
-|fetch_url|string|||
-|push_url|string|||
+|name|string|The name of the repository on disk.  Repository names must begin with a letter and contain only alphanumerics.  Unless the repository is owned by the system user, the name must begin with the owner's username, then be separated from the base repository name with @/@.  You may not create a repository that is owned by a user without a username.|@username/project1@|
+|clone_urls|array|URLs from which the repository can be cloned. Read-only.|@["git@git.zzzzz.arvadosapi.com:foo/bar.git",
+ "https://git.zzzzz.arvadosapi.com/foo/bar.git"]@|
+|fetch_url|string|URL suggested as a fetch-url in git config. Deprecated. Read-only.||
+|push_url|string|URL suggested as a push-url in git config. Deprecated. Read-only.||
diff --git a/doc/api/schema/User.html.textile.liquid b/doc/api/schema/User.html.textile.liquid

index 9a1b0566d4b0861e340e3d198de81ccb60e67e38..335b6c10977fb40dee050e8fd95c59dbdc7c6522 100644 (file)
--- a/doc/api/schema/User.html.textile.liquid
+++ b/doc/api/schema/User.html.textile.liquid
@@ -19,6 +19,7 @@ Each User has, in addition to the usual "attributes of Arvados resources":{{site
  table(table table-bordered table-condensed).
  |_. Attribute|_. Type|_. Description|_. Example|
  |email|string|||
+|username|string|The username used for the user's git repositories and virtual machine logins.  Usernames must start with a letter, and contain only alphanumerics.  When a new user is created, a default username is set from their e-mail address.  Only administrators may change the username.||
  |first_name|string|||
  |last_name|string|||
  |identity_url|string|||
diff --git a/doc/css/button-override.css b/doc/css/button-override.css

new file mode 100644 (file)

index 0000000..a0d9c43
--- /dev/null
+++ b/doc/css/button-override.css
@@ -0,0 +1,7 @@
+.btn:hover,
+.btn:focus,
+.btn:active,
+.btn.active,
+.open .dropdown-toggle.btn {
+  opacity: 0.4;
+}
diff --git a/doc/images/add-new-repository.png b/doc/images/add-new-repository.png

new file mode 100644 (file)

index 0000000..6193844

Binary files /dev/null and b/doc/images/add-new-repository.png differ
diff --git a/doc/images/added-new-repository.png b/doc/images/added-new-repository.png

new file mode 100644 (file)

index 0000000..7d187aa

Binary files /dev/null and b/doc/images/added-new-repository.png differ
diff --git a/doc/images/api-token-host.png b/doc/images/api-token-host.png

new file mode 100644 (file)

index 0000000..cda04d6

Binary files /dev/null and b/doc/images/api-token-host.png differ
diff --git a/doc/images/publicproject/collection-files.png b/doc/images/publicproject/collection-files.png

new file mode 100644 (file)

index 0000000..6e98459

Binary files /dev/null and b/doc/images/publicproject/collection-files.png differ
diff --git a/doc/images/publicproject/collection-graph.png b/doc/images/publicproject/collection-graph.png

new file mode 100644 (file)

index 0000000..85fc3f3

Binary files /dev/null and b/doc/images/publicproject/collection-graph.png differ
diff --git a/doc/images/publicproject/collection-show.png b/doc/images/publicproject/collection-show.png

new file mode 100644 (file)

index 0000000..b867f49

Binary files /dev/null and b/doc/images/publicproject/collection-show.png differ
diff --git a/doc/images/publicproject/collections.png b/doc/images/publicproject/collections.png

new file mode 100644 (file)

index 0000000..9d85552

Binary files /dev/null and b/doc/images/publicproject/collections.png differ
diff --git a/doc/images/publicproject/description.png b/doc/images/publicproject/description.png

new file mode 100644 (file)

index 0000000..34a31e9

Binary files /dev/null and b/doc/images/publicproject/description.png differ
diff --git a/doc/images/publicproject/instance-advanced.png b/doc/images/publicproject/instance-advanced.png

new file mode 100644 (file)

index 0000000..0b8c3c1

Binary files /dev/null and b/doc/images/publicproject/instance-advanced.png differ
diff --git a/doc/images/publicproject/instance-components.png b/doc/images/publicproject/instance-components.png

new file mode 100644 (file)

index 0000000..f99a94d

Binary files /dev/null and b/doc/images/publicproject/instance-components.png differ
diff --git a/doc/images/publicproject/instance-graph.png b/doc/images/publicproject/instance-graph.png

new file mode 100644 (file)

index 0000000..730f244

Binary files /dev/null and b/doc/images/publicproject/instance-graph.png differ
diff --git a/doc/images/publicproject/instance-job.png b/doc/images/publicproject/instance-job.png

new file mode 100644 (file)

index 0000000..64d5281

Binary files /dev/null and b/doc/images/publicproject/instance-job.png differ
diff --git a/doc/images/publicproject/instance-log.png b/doc/images/publicproject/instance-log.png

new file mode 100644 (file)

index 0000000..54f799b

Binary files /dev/null and b/doc/images/publicproject/instance-log.png differ
diff --git a/doc/images/publicproject/instance-show.png b/doc/images/publicproject/instance-show.png

new file mode 100644 (file)

index 0000000..56f0781

Binary files /dev/null and b/doc/images/publicproject/instance-show.png differ
diff --git a/doc/images/publicproject/instances.png b/doc/images/publicproject/instances.png

new file mode 100644 (file)

index 0000000..75b24b2

Binary files /dev/null and b/doc/images/publicproject/instances.png differ
diff --git a/doc/images/repositories-panel.png b/doc/images/repositories-panel.png

new file mode 100644 (file)

index 0000000..3e12860

Binary files /dev/null and b/doc/images/repositories-panel.png differ
diff --git a/doc/images/vm-access-with-webshell.png b/doc/images/vm-access-with-webshell.png

new file mode 100644 (file)

index 0000000..b980fdc

Binary files /dev/null and b/doc/images/vm-access-with-webshell.png differ
diff --git a/doc/index.html.liquid b/doc/index.html.liquid

index 53a798ffa6adaa58f2f5fa8b1ff3aaf562c19be1..ca9bb5d1af97771a73d792df1d392af0a1a26c16 100644 (file)
--- a/doc/index.html.liquid
+++ b/doc/index.html.liquid
@@ -2,7 +2,7 @@
  layout: default
  no_nav_left: true
  navsection: top
-title: Arvados
+title: Arvados | Documentation
  ...
  
  <div class="jumbotron">
@@ -21,28 +21,52 @@ title: Arvados
  
  <div class="container-fluid">
    <div class="row">
-    <div class="col-sm-5">
+    <div class="col-sm-6">
+      <p><strong>What is Arvados</strong>
        <p><a href="https://arvados.org/">Arvados</a> enables you to quickly begin using cloud computing resources in your data science work. It allows you to track your methods and datasets, share them securely, and easily re-run analyses.
        </p>
-      <p><strong>Quickstart</strong>: Check out our <a href="{{ site.baseurl }}/start/index.html">key features</a>, complete with screenshots, and then follow our tutorial to <a href="{{ site.baseurl }}/start/getting_started/firstpipeline.html">run your first pipeline</a> using our <a href="http://lp.curoverse.com/beta-signup/">public beta</a>.
+      <p><strong>Communications</strong>
+      <p>Read our <a href="https://arvados.org/projects/arvados/blogs">blog updates</a> or look through our <a href="https://arvados.org/projects/arvados/activity">recent developer activity</a>.
        </p>
-      <p><strong>News</strong>: Read our <a href="https://arvados.org/projects/arvados/blogs">blog updates</a> or look through our <a href="https://arvados.org/projects/arvados/activity">recent developer activity</a>.
+      <p>Questions? Email <a href="http://lists.arvados.org/mailman/listinfo/arvados">the mailing list</a>, or chat with us on IRC: <a href="irc://irc.oftc.net:6667/#arvados">#arvados</a> @ OFTC (you can <a href="https://webchat.oftc.net/?channels=arvados">join in your browser</a>).
        </p>
-      <p><strong>Questions?</strong> Email <a href="http://lists.arvados.org/mailman/listinfo/arvados">the mailing list</a>, or chat with us on IRC: <a href="irc://irc.oftc.net:6667/#arvados">#arvados</a> @ OFTC (you can <a href="https://webchat.oftc.net/?channels=arvados">join in your browser</a>).
+      <p><strong>Want to contribute?</strong></p>
+      <p>Check out our <a href="https://arvados.org/projects/arvados">developer site</a>. We're open source, check out our code on <a href="https://github.com/curoverse/arvados">github</a>.
        </p>
-      <p><strong>Want to contribute?</strong> Check out our <a href="https://arvados.org/projects/arvados">developer site</a>. We're open source, check out our code on <a href="https://github.com/curoverse/arvados">github</a>.
-      </p>
-      <p><strong>License</strong>: Arvados is under the copyleft <a href="{{ site.baseurl }}/user/copying/agpl-3.0.html">GNU AGPL v3</a>, with our SDKs under <a href="{{ site.baseurl }}/user/copying/LICENSE-2.0.html">Apache License 2.0</a> (so that you can incorporate proprietary toolchains into your pipelines).
+      <p><strong>License</strong></p>
+      <p>Arvados is under the copyleft <a href="{{ site.baseurl }}/user/copying/agpl-3.0.html">GNU AGPL v3</a>, with our SDKs under <a href="{{ site.baseurl }}/user/copying/LICENSE-2.0.html">Apache License 2.0</a> (so that you can incorporate proprietary toolchains into your pipelines).
        </p>
  
      </div>
-    <div class="col-sm-7" style="border-left: solid; border-width: 1px">
-      <p>Below you can also find more in-depth guides for using Arvados.
+    <div class="col-sm-6" style="border-left: solid; border-width: 1px">
+      <p><strong>Quickstart</strong> 
+      <p>
+        Try any pipeline from the <a href="https://arvados.org/projects/arvados/wiki/Public_Pipelines_and_Datasets">list of public pipelines</a>. For instance, the <a href="http://curover.se/pathomap">Pathomap Pipeline</a> links to these <a href="https://arvados.org/projects/arvados/wiki/pathomap_tutorial/">step-by-step instructions</a> for trying Arvados out right in your browser using Curoverse's <a href="http://lp.curoverse.com/beta-signup/">public Arvados instance</a>.
        </p>
-      <br>
+        <!--<p>-->
+      <!--<ol>-->
+         <!--<li>-->
+           <!--Go to <a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}/</a>-->
+        <!--</li><li>-->
+          <!--Register with any Google account-->
+        <!--</li><li>-->
+        <!--Follow the Getting Started guide-->
+        <!--<br>-->
+        <!--<em>Tip: Don't see the guide? You can find it by clicking (in the upper-right corner) <span class="fa fa-lg fa-question-circle"></span> &gt; Getting Started)</em>-->
+        <!--</li>-->
+      <!--</ol>-->
+      <!--</p>-->
+      <p><strong>
+        Pipeline Developer Quickstart
+      </strong></p>
        <p>
-        <a href="{{ site.baseurl }}/start/index.html">Getting Started</a> &mdash; Start here if you're new to Arvados.
+      Want to port your pipeline to Arvados? Check out the step-by-step <a href="https://arvados.org/projects/arvados/wiki/Port_a_Pipeline">Port-a-Pipeline</a> guide on the Arvados wiki.
        </p>
+      <p><strong>More in-depth guides
+      </strong></p>
+      <!--<p>-->
+        <!--<a href="{{ site.baseurl }}/start/index.html">Getting Started</a> &mdash; Start here if you're new to Arvados.-->
+      <!--</p>-->
        <p>
          <a href="{{ site.baseurl }}/user/index.html">User Guide</a> &mdash; How to manage data and do analysis with Arvados.
        </p>
diff --git a/doc/install/copy_pipeline_from_curoverse.html.textile.liquid b/doc/install/copy_pipeline_from_curoverse.html.textile.liquid

new file mode 100644 (file)

index 0000000..fce6605
--- /dev/null
+++ b/doc/install/copy_pipeline_from_curoverse.html.textile.liquid
@@ -0,0 +1,63 @@
+---
+layout: default
+navsection: installguide
+title: Copy pipeline from Curoverse cloud
+...
+
+This tutorial describes how to find and copy a publicly shared pipeline from Curoverse cloud. Please note that you can use similar steps to copy any template you can access from Curoverse cloud to your cluster.
+
+h3. Access a public pipeline in Curoverse cloud using Workbench
+
+Curoverse cloud provides access to some public data, which can be used to experience Arvados in action. Let's access a public pipeline and copy it to your cluster, so that you can run it in your environment.
+
+Start by visiting the "*Curoverse public projects page*":https://cloud.curoverse.com/projects/public. This page lists all the publicly accessible projects in this arvados installation. Click on one of these projects to open it. We will use "*lobSTR v.3 (Public)*":https://cloud.curoverse.com/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq as the example in this tutorial.
+
+Once in the "*lobSTR v.3 (Public)*":https://cloud.curoverse.com/projects/qr1hi-j7d0g-up6qgpqz5ie2vfq project, click on the *Pipeline templates* tab. In the pipeline templates tab, you will see a template named *lobSTR v.3*. Click on the <span class="fa fa-lg fa-gears"></span> *Show* button to the left of this name. This will take to you to the "*lobSTR v.3*":https://cloud.curoverse.com/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu template page.
+
+Once in this page, you can take the *uuid* of this template from the address bar, which is *qr1hi-p5p6p-9pkaxt6qjnkxhhu*. Next, we will copy this template to your Arvados instance.
+
+h3. Copying a pipeline template from Curoverse cloud to your cluster
+
+As described above, navigate to the publicly shared pipeline template "*lobSTR v.3*":https://cloud.curoverse.com/pipeline_templates/qr1hi-p5p6p-9pkaxt6qjnkxhhu using Curoverse Workbench.  We will now copy this template with uuid *qr1hi-p5p6p-9pkaxt6qjnkxhhu* to your cluster.
+
+{% include 'tutorial_expectations' %}
+
+We will use the Arvados *arv-copy* command to copy this template to your cluster. In order to use arv-copy, first you need to setup the source and destination cluster configuration files. Here, *qr1hi* would be the source cluster and your Arvados instance would be the *dst_cluster*.
+
+During this setup, if you have an account in Curoverse cloud, you can use "your access token":#using-your-token to create the source configuration file. If you do not have an account in Curoverse cloud, you can use the "anonymous access token":#using-anonymous-token for the source cluster configuration.
+
+h4(#using-anonymous-token). *Configuring source and destination setup files using anonymous access token*
+
+Configure the source and destination clusters as described in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html tutorial in user guide, while using *5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5* as the API token for source configuration.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=5vqmz9mik2ou2k9objb8pnyce8t97p6vocyaouqo3qalvpmjs5" >> qr1hi.conf</span>
+</code></pre>
+</notextile>
+
+You can now copy the pipeline template from *qr1hi* to *your cluster*. Replace *dst_cluster* with the *uuid_prefix* of your cluster.
+
+<notextile>
+<pre><code>~$ <span class="userinput"> arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu</span>
+</code></pre>
+</notextile>
+
+*Note:* When you are using anonymous access token to copy the template, you will not be able to do a recursive copy since you will not be able to provide the dst-git-repo parameter. In order to perform a recursive copy of the template, you would need to use the Arvados API token from your account as explained in the "using your token":#using-your-token section below.
+
+h4(#using-your-token). *Configuring source and destination setup files using personal access token*
+
+If you already have an account in Curoverse cloud, you can follow the instructions in the "*Using arv-copy*":http://doc.arvados.org/user/topics/arv-copy.html user guide to get your *Current token* for source and destination clusters, and use them to create the source *qr1hi.conf* and dst_cluster.conf configuration files.
+
+You can now copy the pipeline template from *qr1hi* to *your cluster* with or without recursion. Replace *dst_cluster* with the *uuid_prefix* of your cluster.
+
+*Non-recursive copy:*
+<notextile>
+<pre><code>~$ <span class="userinput"> arv-copy --no-recursive --src qr1hi --dst dst_cluster qr1hi-p5p6p-9pkaxt6qjnkxhhu</span></code></pre>
+</notextile>
+
+*Recursive copy:*
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-p5p6p-9pkaxt6qjnkxhhu</span></code></pre>
+</notextile>
diff --git a/doc/install/create-standard-objects.html.textile.liquid b/doc/install/create-standard-objects.html.textile.liquid

index 4e105e82ec392d2a809381b736ee47228c082430..96a838909096e01a393228c3d3b697a144f03ca0 100644 (file)
--- a/doc/install/create-standard-objects.html.textile.liquid
+++ b/doc/install/create-standard-objects.html.textile.liquid
@@ -1,27 +1,69 @@
  ---
  layout: default
  navsection: installguide
-title: Add an Arvados repository
+title: Create standard objects
  
  ...
  
-Next, we're going to use the Arvados CLI tools on the <strong>shell server</strong> to create a few Arvados objects. These objects set up a hosted clone of the arvados repository on this cluster.
+In these steps we use the Arvados CLI tools on the <strong>shell server</strong> to create a few Arvados objects. The CLI tools require an ARVADOS_API_TOKEN environment variable with a valid admin token. If you haven't already done so, set that up as shown in the "API token guide":../user/reference/api-tokens.html.
  
-This will be readable by the "All users" group, and therefore by every active user. This makes it possible for users to run the bundled Crunch scripts by specifying @"script_version":"master","repository":"arvados"@ rather than pulling the Arvados source tree into their own repositories.
+h3. Arvados repository
+
+Here we create a repository object which will be used to set up a hosted clone of the arvados repository on this cluster.
  
  <notextile>
  <pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
  ~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
  ~$ <span class="userinput">all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"</span>
-~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository '{"name":"arvados"}'`</span>
+~$ <span class="userinput">repo_uuid=`arv --format=uuid repository create --repository "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"arvados\"}"`</span>
  ~$ <span class="userinput">echo "Arvados repository uuid is '$repo_uuid'"</span>
-~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
+</code></pre></notextile>
+
+Create a link object to make the repository object readable by the "All users" group, and therefore by every active user. This makes it possible for users to run the bundled Crunch scripts by specifying @"script_version":"master","repository":"arvados"@ rather than pulling the Arvados source tree into their own repositories.
+
+<notextile>
+<pre><code>~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
  <span class="userinput">{
   "tail_uuid":"$all_users_group_uuid",
   "head_uuid":"$repo_uuid",
   "link_class":"permission",
- "name":"can_read" 
-}                                         
+ "name":"can_read"
+}
  EOF</span>
  </code></pre></notextile>
  
+In a couple of minutes, your arvados-git-sync cron job will create an empty repository on your git server. Seed it with the real arvados repository. If your git credential helpers were configured correctly when you "set up your shell server":install-shell-server.html, the "git push" command will use your API token instead of prompting you for a username and password.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd /tmp</span>
+/tmp$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git</span>
+/tmp <span class="userinput">git --git-dir arvados.git push https://git.<b>uuid_prefix.your.domain</b>/arvados.git '*:*'</span>
+</code></pre>
+</notextile>
+
+If you did not set up a HTTPS service, you can push to <code>git@git.uuid_prefix.your.domain:arvados.git</code> using your SSH key, or by logging in to your git server and using sudo.
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo -u git -i bash</span>
+git@gitserver:~$ <span class="userinput">git clone --bare https://github.com/curoverse/arvados.git /tmp/arvados.git</span>
+git@gitserver:~$ <span class="userinput">cd /tmp/arvados.git</span>
+git@gitserver:/tmp/arvados.git$ <span class="userinput">gitolite push /var/lib/arvados/git/repositories/<b>your_arvados_repo_uuid</b>.git '*:*'</span>
+</code></pre>
+</notextile>
+
+h3. Default project for docker images
+
+Here we create a default project for the standard Arvados Docker images, and give all users read access to it. The project is owned by the system user.
+
+<notextile>
+<pre><code>~$ <span class="userinput">project_uuid=`arv --format=uuid group create --group "{\"owner_uuid\":\"$prefix-tpzed-000000000000000\", \"name\":\"Arvados Standard Docker Images\"}"`</span>
+~$ <span class="userinput">echo "Arvados project uuid is '$project_uuid'"</span>
+~$ <span class="userinput">read -rd $'\000' newlink &lt;&lt;EOF; arv link create --link "$newlink"</span>
+<span class="userinput">{
+ "tail_uuid":"$all_users_group_uuid",
+ "head_uuid":"$project_uuid",
+ "link_class":"permission",
+ "name":"can_read"
+}
+EOF</span>
+</code></pre></notextile>
diff --git a/doc/install/index.html.textile.liquid b/doc/install/index.html.textile.liquid

index 2ae58b987c19aec12e1dec108459b75e6e0ccc87..edd2d854f0914a12b073f7ca6f4889b81fbbe339 100644 (file)
--- a/doc/install/index.html.textile.liquid
+++ b/doc/install/index.html.textile.liquid
@@ -4,11 +4,8 @@ navsection: installguide
  title: Installation overview
  ...
  
-Arvados components run on GNU/Linux systems, and do not depend on any particular cloud operating stack. It is developed primarily on Debian and Ubuntu GNU/Linux.
+Arvados components run on GNU/Linux systems, and do not depend on any particular cloud operating stack.  Arvados supports Debian and derivatives such as Ubuntu, as well as Red Hat and derivatives such as CentOS.
  
-Arvados components can be installed and configured in a number of different ways. Step-by-step instructions are available for a few specific setups:
-# "Install pre-built docker images":pre-built-docker.html *(quickest)*
-# "Build your own docker images":install-docker.html from source
-# "Manual installation":install-manual-prerequisites.html (most flexible)
+Arvados components can be installed and configured in a number of different ways.  Step-by-step instructions are available to perform a production installation from packages with manual configuration.  This method assumes you have several (virtual) machines at your disposal for running the various Arvados components.
  
-For production use or evaluation at scale, the "manual installation":install-manual-prerequisites.html is more appropriate. This method assumes you have a number of (virtual) machines at your disposal for running the various Arvados components.
+* "Manual installation":install-manual-prerequisites.html
diff --git a/doc/install/install-api-server.html.textile.liquid b/doc/install/install-api-server.html.textile.liquid

index ef2e474f821bfae65ad280cdc1c1ab5f602bdb16..d2b7566b7d1bfa2b55f7b40ef0e6b2b16ac8d692 100644 (file)
--- a/doc/install/install-api-server.html.textile.liquid
+++ b/doc/install/install-api-server.html.textile.liquid
@@ -4,180 +4,344 @@ navsection: installguide
  title: Install the API server
  ...
  
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
  h2. Install prerequisites
  
-<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install \
-    bison build-essential gettext libcurl3 libcurl3-gnutls \
-    libcurl4-openssl-dev libpcre3-dev libpq-dev libreadline-dev \
-    libssl-dev libxslt1.1 postgresql git wget zlib1g-dev
-</span></code></pre></notextile>
+The Arvados package repository includes an API server package that can help automate much of the deployment.
  
-Also make sure you have "Ruby and bundler":install-manual-prerequisites-ruby.html installed.
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
  
-h2. Download the source tree
+{% include 'install_ruby_and_bundler' %}
  
-<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
-</code></pre></notextile>
+h3(#install_postgres). Install PostgreSQL
  
-See also: "Downloading the source code":https://arvados.org/projects/arvados/wiki/Download on the Arvados wiki.
+{% include 'install_postgres' %}
  
-The API server is in @services/api@ in the source tree.
+h2(#install_apiserver). Install API server and dependencies
  
-h2. Install gem dependencies
+On a Debian-based system, install the following packages:
  
  <notextile>
-<pre><code>~$ <span class="userinput">cd arvados/services/api</span>
-~/arvados/services/api$ <span class="userinput">bundle install</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential libcurl4-openssl-dev git arvados-api-server</span>
+</code></pre>
+</notextile>
+
+On a Red Hat-based system, install the following packages:
  
-h2. Choose your environment
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ libcurl-devel git arvados-api-server</span>
+</code></pre>
+</notextile>
  
-The API server can be run in @development@ or in @production@ mode. Unless this installation is going to be used for development on the Arvados API server itself, you should run it in @production@ mode.
+{% include 'install_git' %}
  
-Copy the example environment file for your environment. For example, if you choose @production@:
+h2. Set up the database
+
+Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">cp -i config/environments/production.rb.example config/environments/production.rb</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+6gqa1vu492idd7yca9tfandj3
  </code></pre></notextile>
  
-h2. Configure the API server
-
-First, copy the example configuration file:
+Create a new database user.
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
+<pre><code>~$ <span class="userinput">sudo -u postgres createuser --encrypted -R -S --pwprompt arvados</span>
+[sudo] password for <b>you</b>: <span class="userinput">yourpassword</span>
+Enter password for new role: <span class="userinput">paste-password-you-generated</span>
+Enter it again: <span class="userinput">paste-password-again</span>
  </code></pre></notextile>
  
-The API server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the API server and is provided for installation convenience, only.
+{% include 'notebox_begin' %}
  
-Consult @config/application.default.yml@ for a full list of configuration options. Always put your local configuration in @config/application.yml@, never edit @config/application.default.yml@.
+This user setup assumes that your PostgreSQL is configured to accept password authentication.  Red Hat systems use ident-based authentication by default.  You may need to either adapt the user creation, or reconfigure PostgreSQL (in @pg_hba.conf@) to accept password authentication.
  
-h3(#uuid_prefix). uuid_prefix
+{% include 'notebox_end' %}
  
-Define your @uuid_prefix@ in @config/application.yml@ by setting the @uuid_prefix@ field in the section for your environment.  This prefix is used for all database identifiers to identify the record as originating from this site.  It must be exactly 5 alphanumeric characters (lowercase ASCII letters and digits).
+Create the database:
  
-h3(#git_repositories_dir). git_repositories_dir
+<notextile>
+<pre><code>~$ <span class="userinput">sudo -u postgres createdb arvados_production -T template0 -E UTF8 -O arvados</span>
+</code></pre>
+</notextile>
  
-This field defaults to @/var/lib/arvados/git@. You can override the value by defining it in @config/application.yml@.
+h2. Set up configuration files
  
-Make sure a clone of the arvados repository exists in @git_repositories_dir@.
+The API server package uses configuration files that you write to @/etc/arvados/api@ and ensures they're consistently deployed.  Create this directory and copy the example configuration files to it:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
-~/arvados/services/api$ <span class="userinput">sudo git clone --bare ../../.git /var/lib/arvados/git/arvados.git</span>
-</code></pre></notextile>
-
-h3. secret_token
+<pre><code>~$ <span class="userinput">sudo mkdir -p /etc/arvados/api</span>
+~$ <span class="userinput">sudo chmod 700 /etc/arvados/api</span>
+~$ <span class="userinput">cd /var/www/arvados-api/current</span>
+/var/www/arvados-api/current$ <span class="userinput">sudo cp config/database.yml.example /etc/arvados/api/database.yml</span>
+/var/www/arvados-api/current$ <span class="userinput">sudo cp config/application.yml.example /etc/arvados/api/application.yml</span>
+</code></pre>
+</notextile>
  
-Generate a new secret token for signing cookies:
+h2. Configure the database connection
  
-<notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">rake secret</span>
-zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
-</code></pre></notextile>
+Edit @/etc/arvados/api/database.yml@ and replace the @xxxxxxxx@ database password placeholders with the PostgreSQL password you generated above.
  
-Then put that value in the @secret_token@ field.
+h2(#configure_application). Configure the API server
  
-h3. blob_signing_key
+Edit @/etc/arvados/api/application.yml@ to configure the settings described in the following sections.  The deployment script will consistently deploy this to the API server's configuration directory.  The API server reads both @application.yml@ and its own @config/application.default.yml@ file.  The settings in @application.yml@ take precedence over the defaults that are defined in @config/application.default.yml@.  The @config/application.yml.example@ file is not read by the API server and is provided as a starting template only.
  
-If you want access control on your "Keepstore":install-keepstore.html server(s), you should set @blob_signing_key@ to the same value as the permission key you provide to your Keepstore daemon(s).
+@config/application.default.yml@ documents additional configuration settings not listed here.  You can "view the current source version":https://arvados.org/projects/arvados/repository/revisions/master/entry/services/api/config/application.default.yml for reference.
  
-h3. workbench_address
+Only put local configuration in @application.yml@.  Do not edit @application.default.yml@.
  
-Fill in the url of your workbench application in @workbench_address@, for example
+h3(#uuid_prefix). uuid_prefix
  
-&nbsp;&nbsp;https://workbench.@prefix_uuid@.your.domain
+Define your @uuid_prefix@ in @application.yml@ by setting the @uuid_prefix@ field in the section for your environment.  This prefix is used for all database identifiers to identify the record as originating from this site.  It must be exactly 5 lowercase ASCII letters and digits.
  
-h3. other options
+Example @application.yml@:
  
-Consult @application.default.yml@ for a full list of configuration options. Always put your local configuration in @application.yml@ instead of editing @application.default.yml@.
+<notextile>
+<pre><code>  uuid_prefix: <span class="userinput">zzzzz</span></code></pre>
+</notextile>
  
-h2. Set up the database
+h3. secret_token
  
-Generate a new database password. Nobody ever needs to memorize it or type it, so we'll make a strong one:
+The @secret_token@ is used for for signing cookies.  IMPORTANT: This is a site secret. It should be at least 50 characters.  Generate a random value and set it in @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
-6gqa1vu492idd7yca9tfandj3
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy
  </code></pre></notextile>
  
-Create a new database user with permission to create its own databases.
+Example @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados</span>
-[sudo] password for <b>you</b>: <span class="userinput">yourpassword</span>
-Enter password for new role: <span class="userinput">paste-password-you-generated</span>
-Enter it again: <span class="userinput">paste-password-again</span>
-</code></pre></notextile>
+<pre><code>  secret_token: <span class="userinput">yyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyyy</span></code></pre>
+</notextile>
  
-Configure API server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.
+h3(#blob_signing_key). blob_signing_key
+
+The @blob_signing_key@ is used to enforce access control to Keep blocks.  This same key must be provided to the Keepstore daemons when "installing Keepstore servers.":install-keepstore.html  IMPORTANT: This is a site secret. It should be at least 50 characters.  Generate a random value and set it in @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">cp -i config/database.yml.sample config/database.yml</span>
-~/arvados/services/api$ <span class="userinput">edit config/database.yml</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
+xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  </code></pre></notextile>
  
-Create and initialize the database. If you are planning a production system, choose the @production@ rails environment, otherwise use @development@.
+Example @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rake db:setup</span>
-</code></pre></notextile>
+<pre><code>  blob_signing_key: <span class="userinput">xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx</span></code></pre>
+</notextile>
+
+h3(#omniauth). sso_app_secret, sso_app_id, sso_provider_url
+
+The following settings enable the API server to communicate with the "Single Sign On (SSO) server":install-sso.html to authenticate user log in.
+
+Set @sso_provider_url@ to the base URL where your SSO server is installed.  This should be a URL consisting of the scheme and host (and optionally, port), without a trailing slash.
  
-Alternatively, if the database user you intend to use for the API server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above.
+Set @sso_app_secret@ and @sso_app_id@ to the corresponding values for @app_secret@ and @app_id@ used in the "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client step.
+
+Example @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">su postgres createdb arvados_production -E UTF8 -O arvados</span>
-~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rake db:structure:load</span>
-~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
-</code></pre></notextile>
+<pre><code>  sso_app_id: <span class="userinput">arvados-server</span>
+  sso_app_secret: <span class="userinput">wwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwwww</span>
+  sso_provider_url: <span class="userinput">https://sso.example.com</span>
+</code></pre>
+</notextile>
+
+h3. workbench_address
+
+Set @workbench_address@ to the URL of your workbench application after following "Install Workbench.":install-workbench-app.html
+
+Example @application.yml@:
  
-{% include 'notebox_begin' %}
-You can safely ignore the following error message you may see when loading the database structure:
  <notextile>
-<pre><code>ERROR:  must be owner of extension plpgsql</code></pre></notextile>
-{% include 'notebox_end' %}
+<pre><code>  workbench_address: <span class="userinput">https://workbench.zzzzz.example.com</span></code></pre>
+</notextile>
+
+h3. websocket_address
+
+Set @websocket_address@ to the @wss://@ URL of the API server websocket endpoint after following "Set up Web servers":#set_up.  The path of the default endpoint is @/websocket@.
+
+Example @application.yml@:
+
+<notextile>
+<pre><code>  websocket_address: <span class="userinput">wss://ws.zzzzz.example.com</span>/websocket</code></pre>
+</notextile>
  
-h2(#omniauth). Set up omniauth
+h3(#git_repositories_dir). git_repositories_dir
+
+The @git_repositories_dir@ setting specifies the directory where user git repositories will be stored.
  
-First copy the omniauth configuration file:
+The git server setup process is covered on "its own page":install-arv-git-httpd.html. For now, create an empty directory in the default location:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">cp -i config/initializers/omniauth.rb.example config/initializers/omniauth.rb
+<pre><code>~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git/repositories</span>
  </code></pre></notextile>
  
-Edit @config/initializers/omniauth.rb@ to configure the SSO server for authentication.  @APP_ID@ and @APP_SECRET@ correspond to the @app_id@ and @app_secret@ set in "Create arvados-server client for Single Sign On (SSO)":install-sso.html#client and @CUSTOM_PROVIDER_URL@ is the address of your SSO server.
+If you intend to store your git repositories in a different location, specify that location in @application.yml@.
+
+Default setting in @application.default.yml@:
  
  <notextile>
-<pre><code>APP_ID = 'arvados-server'
-APP_SECRET = 'xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx'
-CUSTOM_PROVIDER_URL = 'https://sso.example.com/'
+<pre><code>  git_repositories_dir: <span class="userinput">/var/lib/arvados/git/repositories</span>
  </code></pre>
  </notextile>
  
-h2. Start the API server
+h3(#git_internal_dir). git_internal_dir
  
-h3. Development environment
+The @git_internal_dir@ setting specifies the location of Arvados' internal git repository.  By default this is @/var/lib/arvados/internal.git@.  This repository stores git commits that have been used to run Crunch jobs.  It should _not_ be a subdirectory of @git_repositories_dir@.
  
-If you plan to run in development mode, you can now run the development server this way:
+Example @application.yml@:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">bundle exec rails server --port=3030
-</code></pre></notextile>
+<pre><code>  git_internal_dir: <span class="userinput">/var/lib/arvados/internal.git</span>
+</code></pre>
+</notextile>
  
-h3. Production environment
+h2. Prepare the API server deployment
  
-We recommend "Passenger":https://www.phusionpassenger.com/ to run the API server in production.
+Now that all your configuration is in place, run @/usr/local/bin/arvados-api-server-upgrade.sh@.  This will install and check your configuration, install necessary gems, and run any necessary database setup.
  
-Point it to the services/api directory in the source tree.
+{% include 'notebox_begin' %}
+You can safely ignore the following messages if they appear while this script runs:
+<pre>Don't run Bundler as root. Bundler can ask for sudo if it is needed, and installing your bundle as root will
+break this application for all non-root users on this machine.</pre>
+<pre>fatal: Not a git repository (or any of the parent directories): .git</pre>
+{% include 'notebox_end' %}
+
+This command aborts when it encounters an error.  It's safe to rerun multiple times, so if there's a problem with your configuration, you can fix that and try again.
  
-To enable streaming so users can monitor crunch jobs in real time, make sure to add the following to your Passenger configuration:
+h2(#set_up). Set up Web servers
+
+For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend for the main API server and a Puma backend for API server Websockets.  To do that:
  
  <notextile>
-<pre><code><span class="userinput">PassengerBufferResponse off</span>
+<ol>
+<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+
+<li><p>Puma is already included with the API server's gems.  We recommend you run it as a service under <a href="http://smarden.org/runit/">runit</a> or a similar tool.  Here's a sample runit script for that:</p>
+
+<pre><code>#!/bin/bash
+
+set -e
+exec 2>&1
+
+# Uncomment the line below if you're using RVM.
+#source /etc/profile.d/rvm.sh
+
+envdir="`pwd`/env"
+mkdir -p "$envdir"
+echo ws-only > "$envdir/ARVADOS_WEBSOCKETS"
+
+cd /var/www/arvados-api/current
+echo "Starting puma in `pwd`"
+
+# Change arguments below to match your deployment, "webserver-user" and
+# "webserver-group" should be changed to the user and group of the web server
+# process.  This is typically "www-data:www-data" on Debian systems by default,
+# other systems may use different defaults such the name of the web server
+# software (for example, "nginx:nginx").
+exec chpst -m 1073741824 -u webserver-user:webserver-group -e "$envdir" \
+  bundle exec puma -t 0:512 -e production -b tcp://127.0.0.1:8100
+</code></pre>
+</li>
+
+<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for both it and Puma.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
+
+<pre><code>server {
+  listen 127.0.0.1:8000;
+  server_name localhost-api;
+
+  root /var/www/arvados-api/current/public;
+  index  index.html index.htm index.php;
+
+  passenger_enabled on;
+  # If you're using RVM, uncomment the line below.
+  #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream api {
+  server     127.0.0.1:8000  fail_timeout=10s;
+}
+
+upstream websockets {
+  # The address below must match the one specified in puma's -b option.
+  server     127.0.0.1:8100  fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+# When Keep clients request a list of Keep services from the API server, the
+# server will automatically return the list of available proxies if
+# the request headers include X-External-Client: 1.  Following the example
+# here, at the end of this section, add a line for each netmask that has
+# direct access to Keep storage daemons to set this header value to 0.
+geo $external_client {
+  default        1;
+  <span class="userinput">10.20.30.0/24</span>  0;
+}
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  <span class="userinput">uuid_prefix.your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html index.htm index.php;
+
+  # This value effectively limits the size of API objects users can create,
+  # especially collections.  If you change this, you should also set
+  # `max_request_size` in the API server's application.yml file to the same
+  # value.
+  client_max_body_size 128m;
+
+  location / {
+    proxy_pass            http://api;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      Host $http_host;
+    proxy_set_header      X-External-Client $external_client;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  ws.<span class="userinput">uuid_prefix.your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html index.htm index.php;
+
+  location / {
+    proxy_pass            http://websockets;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      Upgrade $http_upgrade;
+    proxy_set_header      Connection "upgrade";
+    proxy_set_header      Host $host;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
  </code></pre>
+</li>
+
+<li><p>Restart Nginx:</p>
+
+<pre><code>~$ <span class="userinput">sudo nginx -s reload</span>
+</code></pre>
+
+</li>
+
+</ol>
  </notextile>
diff --git a/doc/install/install-arv-git-httpd.html.textile.liquid b/doc/install/install-arv-git-httpd.html.textile.liquid

new file mode 100644 (file)

index 0000000..1c31dc4
--- /dev/null
+++ b/doc/install/install-arv-git-httpd.html.textile.liquid
@@ -0,0 +1,366 @@
+---
+layout: default
+navsection: installguide
+title: Install the Git server
+...
+
+Arvados allows users to create their own private and public git repositories, and clone/push them using SSH and HTTPS.
+
+The git hosting setup involves three components.
+* The "arvados-git-sync.rb" script polls the API server for the current list of repositories, creates bare repositories, and updates the local permission cache used by gitolite.
+* Gitolite provides SSH access.
+* arvados-git-http provides HTTPS access.
+
+It is not strictly necessary to deploy _both_ SSH and HTTPS access, but we recommend deploying both:
+* SSH is a more appropriate way to authenticate from a user's workstation because it does not require managing tokens on the client side;
+* HTTPS is a more appropriate way to authenticate from a shell VM because it does not depend on SSH agent forwarding (SSH clients' agent forwarding features tend to behave as if the remote machine is fully trusted).
+
+The HTTPS instructions given below will not work if you skip the SSH setup steps.
+
+h2. Set up DNS
+
+By convention, we use the following hostname for the git service:
+
+<notextile>
+<pre><code>git.<span class="userinput">uuid_prefix</span>.your.domain
+</code></pre>
+</notextile>
+
+{% include 'notebox_begin' %}
+Here, we show how to install the git hosting services *on the same host as your API server.* Using a different host is not yet fully supported. On this page we will refer to it as your git server.
+{% include 'notebox_end' %}
+
+DNS and network configuration should be set up so port 443 reaches your HTTPS proxy, and port 22 reaches the OpenSSH service on your git server.
+
+h2. Generate an API token
+
+Use the following command to generate an API token, changing *@webserver-user@* to the user of the web server process.  This is typically *@www-data@* on Debian systems by default, other systems may use different defaults such the name of the web server software (for example, *@nginx@*).
+
+Using RVM:
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">cd /var/www/arvados-api/current</span>
+gitserver:/var/www/arvados-api/current$ <span class="userinput">sudo -u <b>webserver-user</b> RAILS_ENV=production `which rvm-exec` default bundle exec ./script/create_superuser_token.rb</span>
+zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+</code></pre>
+</notextile>
+
+Not using RVM:
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">cd /var/www/arvados-api/current</span>
+gitserver:/var/www/arvados-api/current$ <span class="userinput">sudo -u <b>webserver-user</b> RAILS_ENV=production bundle exec ./script/create_superuser_token.rb</span>
+zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
+</code></pre>
+</notextile>
+
+Copy that token; you'll need it in a minute.
+
+h2. Install git and other dependencies
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo apt-get install git openssh-server</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo yum install git perl-Data-Dumper openssh-server</span>
+</code></pre>
+</notextile>
+
+{% include 'install_git' %}
+
+h2. Create a "git" user and a storage directory
+
+Gitolite and some additional scripts will be installed in @/var/lib/arvados/git@, which means hosted repository data will be stored in @/var/lib/arvados/git/repositories@. If you choose to install gitolite in a different location, make sure to update the @git_repositories_dir@ entry in your API server's @application.yml@ file accordingly: for example, if you install gitolite at @/data/gitolite@ then your @git_repositories_dir@ will be @/data/gitolite/repositories@.
+
+A new UNIX account called "git" will own the files. This makes git URLs look familiar to users (<code>git@[...]:username/reponame.git</code>).
+
+On Debian- or Red Hat-based systems:
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo mkdir -p /var/lib/arvados/git</span>
+gitserver:~$ <span class="userinput">sudo useradd --comment git --home-dir /var/lib/arvados/git git</span>
+gitserver:~$ <span class="userinput">sudo chown -R git:git ~git</span>
+</code></pre>
+</notextile>
+
+The git user needs its own SSH key. (It must be able to run <code>ssh git@localhost</code> from scripts.)
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo -u git -i bash</span>
+git@gitserver:~$ <span class="userinput">ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa</span>
+git@gitserver:~$ <span class="userinput">cp .ssh/id_rsa.pub .ssh/authorized_keys</span>
+git@gitserver:~$ <span class="userinput">ssh -o stricthostkeychecking=no localhost cat .ssh/id_rsa.pub</span>
+Warning: Permanently added 'localhost' (ECDSA) to the list of known hosts.
+ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7aBIDAAgMQN16Pg6eHmvc+D+6TljwCGr4YGUBphSdVb25UyBCeAEgzqRiqy0IjQR2BLtSirXr+1SJAcQfBgI/jwR7FG+YIzJ4ND9JFEfcpq20FvWnMMQ6XD3y3xrZ1/h/RdBNwy4QCqjiXuxDpDB7VNP9/oeAzoATPZGhqjPfNS+RRVEQpC6BzZdsR+S838E53URguBOf9yrPwdHvosZn7VC0akeWQerHqaBIpSfDMtaM4+9s1Gdsz0iP85rtj/6U/K/XOuv2CZsuVZZ52nu3soHnEX2nx2IaXMS3L8Z+lfOXB2T6EaJgXF7Z9ME5K1tx9TSNTRcYCiKztXLNLSbp git@gitserver
+git@gitserver:~$ <span class="userinput">rm .ssh/authorized_keys</span>
+</code></pre>
+</notextile>
+
+h2. Install gitolite
+
+Check "https://github.com/sitaramc/gitolite/tags":https://github.com/sitaramc/gitolite/tags for the latest stable version. This guide was tested with @v3.6.3@. _Versions below 3.0 are missing some features needed by Arvados, and should not be used._
+
+Download and install the version you selected.
+
+<notextile>
+<pre><code>git@gitserver:~$ <span class="userinput">echo 'PATH=$HOME/bin:$PATH' &gt;.profile</span>
+git@gitserver:~$ <span class="userinput">source .profile</span>
+git@gitserver:~$ <span class="userinput">git clone --branch <b>v3.6.3</b> git://github.com/sitaramc/gitolite</span>
+...
+Note: checking out '5d24ae666bfd2fa9093d67c840eb8d686992083f'.
+...
+git@gitserver:~$ <span class="userinput">mkdir bin</span>
+git@gitserver:~$ <span class="userinput">gitolite/install -ln ~git/bin</span>
+git@gitserver:~$ <span class="userinput">bin/gitolite setup -pk .ssh/id_rsa.pub</span>
+Initialized empty Git repository in /var/lib/arvados/git/repositories/gitolite-admin.git/
+Initialized empty Git repository in /var/lib/arvados/git/repositories/testing.git/
+WARNING: /var/lib/arvados/git/.ssh/authorized_keys missing; creating a new one
+    (this is normal on a brand new install)
+</code></pre>
+</notextile>
+
+_If this didn't go well, more detail about installing gitolite, and information about how it works, can be found on the "gitolite home page":http://gitolite.com/._
+
+Clone the gitolite-admin repository. The arvados-git-sync.rb script works by editing the files in this working directory and pushing them to gitolite. Here we make sure "git push" won't produce any errors or warnings.
+
+<notextile>
+<pre><code>git@gitserver:~$ <span class="userinput">git clone git@localhost:gitolite-admin</span>
+Cloning into 'gitolite-admin'...
+remote: Counting objects: 6, done.
+remote: Compressing objects: 100% (4/4), done.
+remote: Total 6 (delta 0), reused 0 (delta 0)
+Receiving objects: 100% (6/6), done.
+Checking connectivity... done.
+git@gitserver:~$ <span class="userinput">cd gitolite-admin</span>
+git@gitserver:~/gitolite-admin$ <span class="userinput">git config user.email arvados</span>
+git@gitserver:~/gitolite-admin$ <span class="userinput">git config user.name arvados</span>
+git@gitserver:~/gitolite-admin$ <span class="userinput">git config push.default simple</span>
+git@gitserver:~/gitolite-admin$ <span class="userinput">git push</span>
+Everything up-to-date
+</code></pre>
+</notextile>
+
+h3. Configure gitolite
+
+Configure gitolite to look up a repository name like @username/reponame.git@ and find the appropriate bare repository storage directory.
+
+Add the following lines to the top of @~git/.gitolite.rc@:
+
+<notextile>
+<pre><code><span class="userinput">my $repo_aliases;
+my $aliases_src = "$ENV{HOME}/.gitolite/arvadosaliases.pl";
+if ($ENV{HOME} && (-e $aliases_src)) {
+    $repo_aliases = do $aliases_src;
+}
+$repo_aliases ||= {};
+</span></code></pre>
+</notextile>
+
+Add the following lines inside the section that begins @%RC = (@:
+
+<notextile>
+<pre><code><span class="userinput">    REPO_ALIASES => $repo_aliases,
+</span></code></pre>
+</notextile>
+
+Inside that section, adjust the 'UMASK' setting to @022@, to ensure the API server has permission to read repositories:
+
+<notextile>
+<pre><code>    UMASK => <span class="userinput">022</span>,
+</code></pre>
+</notextile>
+
+Uncomment the 'Alias' line in the section that begins @ENABLE => [@:
+
+<notextile>
+<pre><code><span class="userinput">            # access a repo by another (possibly legacy) name
+            'Alias',
+</span></code></pre>
+</notextile>
+
+h2. Configure git synchronization
+
+Create a configuration file @/var/www/arvados-api/current/config/arvados-clients.yml@ using the following template, filling in the appropriate values for your system.
+* For @arvados_api_token@, use the token you generated above.
+* For @gitolite_arvados_git_user_key@, provide the public key you generated above, i.e., the contents of @~git/.ssh/id_rsa.pub@.
+
+<notextile>
+<pre><code>production:
+  gitolite_url: /var/lib/arvados/git/repositories/gitolite-admin.git
+  gitolite_tmp: /var/lib/arvados/git
+  arvados_api_host: <span class="userinput">uuid_prefix.example.com</span>
+  arvados_api_token: "<span class="userinput">zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz</span>"
+  arvados_api_host_insecure: <span class="userinput">false</span>
+  gitolite_arvados_git_user_key: "<span class="userinput">ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7aBIDAAgMQN16Pg6eHmvc+D+6TljwCGr4YGUBphSdVb25UyBCeAEgzqRiqy0IjQR2BLtSirXr+1SJAcQfBgI/jwR7FG+YIzJ4ND9JFEfcpq20FvWnMMQ6XD3y3xrZ1/h/RdBNwy4QCqjiXuxDpDB7VNP9/oeAzoATPZGhqjPfNS+RRVEQpC6BzZdsR+S838E53URguBOf9yrPwdHvosZn7VC0akeWQerHqaBIpSfDMtaM4+9s1Gdsz0iP85rtj/6U/K/XOuv2CZsuVZZ52nu3soHnEX2nx2IaXMS3L8Z+lfOXB2T6EaJgXF7Z9ME5K1tx9TSNTRcYCiKztXLNLSbp git@gitserver</span>"
+</code></pre>
+</notextile>
+
+h3. Enable the synchronization script
+
+The API server package includes a script that retrieves the current set of repository names and permissions from the API, writes them to @arvadosaliases.pl@ in a format usable by gitolite, and triggers gitolite hooks which create new empty repositories if needed. This script should run every 2 to 5 minutes.
+
+If you are using RVM, create @/etc/cron.d/arvados-git-sync@ with the following content:
+
+<notextile>
+<pre><code><span class="userinput">*/5 * * * * git cd /var/www/arvados-api/current && /usr/local/rvm/bin/rvm-exec default bundle exec script/arvados-git-sync.rb production</span>
+</code></pre>
+</notextile>
+
+Otherwise, create @/etc/cron.d/arvados-git-sync@ with the following content:
+
+<notextile>
+<pre><code><span class="userinput">*/5 * * * * git cd /var/www/arvados-api/current && bundle exec script/arvados-git-sync.rb production</span>
+</code></pre>
+</notextile>
+
+h3. Configure the API server to advertise the correct SSH URLs
+
+In your API server's @application.yml@ file, add the following entry:
+
+<notextile>
+<pre><code>git_repo_ssh_base: "git@git.<span class="userinput">uuid_prefix.your.domain</span>:"
+</code></pre>
+</notextile>
+
+Make sure to include the trailing colon.
+
+h2. Install the arvados-git-httpd package
+
+This is needed only for HTTPS access.
+
+The arvados-git-httpd package provides HTTP access, using Arvados authentication tokens instead of passwords. It is intended to be installed on the system where your git repositories are stored, and accessed through a web proxy that provides SSL support.
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install git arvados-git-httpd</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install git arvados-git-httpd</span>
+</code></pre>
+</notextile>
+
+Verify that @arvados-git-httpd@ and @git-http-backend@ can be run:
+
+<notextile>
+<pre><code>~$ <span class="userinput">arvados-git-httpd -h</span>
+Usage of arvados-git-httpd:
+  -address="0.0.0.0:80": Address to listen on, "host:port".
+  -git-command="/usr/bin/git": Path to git executable. Each authenticated request will execute this program with a single argument, "http-backend".
+  -repo-root="/path/to/cwd": Path to git repositories.
+~$ <span class="userinput">git http-backend</span>
+Status: 500 Internal Server Error
+Expires: Fri, 01 Jan 1980 00:00:00 GMT
+Pragma: no-cache
+Cache-Control: no-cache, max-age=0, must-revalidate
+
+fatal: No REQUEST_METHOD from server
+</code></pre>
+</notextile>
+
+h3. Enable arvados-git-httpd
+
+On Debian-based systems, install runit:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install runit</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems, "install runit from source":http://smarden.org/runit/install.html or use an alternative daemon supervisor.
+
+Configure runit to run arvados-git-httpd, making sure to update the API host to match your site:
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd /etc/sv</span>
+/etc/sv$ <span class="userinput">sudo mkdir arvados-git-httpd; cd arvados-git-httpd</span>
+/etc/sv/arvados-git-httpd$ <span class="userinput">sudo mkdir log</span>
+/etc/sv/arvados-git-httpd$ <span class="userinput">sudo sh -c 'cat &gt;log/run' &lt;&lt;'EOF'
+#!/bin/sh
+mkdir -p main
+chown git:git main
+exec chpst -u git:git svlogd -tt main
+EOF</span>
+/etc/sv/arvados-git-httpd$ <span class="userinput">sudo sh -c 'cat &gt;run' &lt;&lt;'EOF'
+#!/bin/sh
+export ARVADOS_API_HOST=<b>uuid_prefix.your.domain</b>
+export GITOLITE_HTTP_HOME=/var/lib/arvados/git
+export GL_BYPASS_ACCESS_CHECKS=1
+export PATH="$PATH:/var/lib/arvados/git/bin"
+exec chpst -u git:git arvados-git-httpd -address=:9001 -git-command=/var/lib/arvados/git/gitolite/src/gitolite-shell -repo-root=<b>/var/lib/arvados/git</b>/repositories 2&gt;&1
+EOF</span>
+/etc/sv/arvados-git-httpd$ <span class="userinput">sudo chmod +x run log/run</span>
+</code></pre>
+</notextile>
+
+If you are using a different daemon supervisor, or if you want to test the daemon in a terminal window, an equivalent shell command to run arvados-git-httpd is:
+
+<notextile>
+<pre><code>sudo -u git \
+  ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span> \
+  GITOLITE_HTTP_HOME=/var/lib/arvados/git \
+  GL_BYPASS_ACCESS_CHECKS=1 \
+  PATH="$PATH:/var/lib/arvados/git/bin" \
+  arvados-git-httpd -address=:9001 -git-command=/var/lib/arvados/git/gitolite/src/gitolite-shell -repo-root=/var/lib/arvados/git/repositories 2&gt;&1
+</code></pre>
+</notextile>
+
+h3. Set up a reverse proxy to provide SSL service
+
+The arvados-git-httpd service will be accessible from anywhere on the internet, so we recommend using SSL.
+
+This is best achieved by putting a reverse proxy with SSL support in front of arvados-git-httpd, running on port 443 and passing requests to @arvados-git-httpd@ on port 9001 (or whichever port you used in your run script).
+
+Add the following configuration to the @http@ section of your Nginx configuration:
+
+<notextile>
+<pre><code>
+upstream arvados-git-httpd {
+  server                  127.0.0.1:<span class="userinput">9001</span>;
+}
+server {
+  listen                  <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name             git.<span class="userinput">uuid_prefix.your.domain</span>;
+  proxy_connect_timeout   90s;
+  proxy_read_timeout      300s;
+
+  ssl on;
+  ssl_certificate         <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key     <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  location  / {
+    proxy_pass            http://arvados-git-httpd;
+  }
+}
+</code></pre>
+</notextile>
+
+h3. Configure the API server to advertise the correct HTTPS URLs
+
+In your API server's @application.yml@ file, add the following entry:
+
+<notextile>
+<pre><code>git_repo_http_base: https://git.<span class="userinput">uuid_prefix.your.domain</span>/
+</code></pre>
+</notextile>
+
+Make sure to include the trailing slash.
+
+h2. Restart Nginx
+
+Restart Nginx to make the Nginx and API server configuration changes take effect.
+
+<notextile>
+<pre><code>gitserver:~$ <span class="userinput">sudo nginx -s reload</span>
+</code></pre>
+</notextile>
diff --git a/doc/install/install-compute-node.html.textile.liquid b/doc/install/install-compute-node.html.textile.liquid

new file mode 100644 (file)

index 0000000..250d1dc
--- /dev/null
+++ b/doc/install/install-compute-node.html.textile.liquid
@@ -0,0 +1,182 @@
+---
+layout: default
+navsection: installguide
+title: Install a compute node
+...
+
+h2. Install dependencies
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install perl python-virtualenv fuse python-arvados-python-client python-arvados-fuse crunchstat arvados-docker-cleaner iptables ca-certificates</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install perl python27-python-virtualenv fuse python27-python-arvados-python-client python27-python-arvados-fuse crunchstat arvados-docker-cleaner iptables ca-certificates</span>
+</code></pre>
+</notextile>
+
+{% include 'note_python27_sc' %}
+
+h2. Install Docker
+
+Compute nodes must have Docker installed to run jobs inside containers.  This requires a relatively recent version of Linux (at least upstream version 3.10, or a distribution version with the appropriate patches backported).  Follow the "Docker Engine installation documentation":https://docs.docker.com/ for your distribution.
+
+For Debian-based systems, the Arvados package repository includes a backported @docker.io@ package with a known-good version you can install.
+
+h2. Configure Docker
+
+Crunch runs jobs in Docker containers with relatively little configuration.  You may need to start the Docker daemon with specific options to make sure these jobs run smoothly in your environment.  This section highlights options that are useful to most installations.  Refer to the "Docker daemon reference":https://docs.docker.com/reference/commandline/daemon/ for complete information about all available options.
+
+The best way to configure these options varies by distribution.
+
+* If you're using our backported @docker.io@ package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker.io@.
+* If you're using another Debian-based package, you can list these options in the @DOCKER_OPTS@ setting in @/etc/default/docker@.
+* On Red Hat-based distributions, you can list these options in the @other_args@ setting in @/etc/sysconfig/docker@.
+
+h3. Default ulimits
+
+Docker containers inherit ulimits from the Docker daemon.  However, the ulimits for a single Unix daemon may not accommodate a long-running Crunch job.  You may want to increase default limits for compute jobs by passing @--default-ulimit@ options to the Docker daemon.  For example, to allow jobs to open 10,000 files, set @--default-ulimit nofile=10000:10000@.
+
+h3. DNS
+
+Your containers must be able to resolve the hostname in the ARVADOS_API_HOST environment variable (provided by the Crunch dispatcher) and any hostnames returned in Keep service records.  If these names are not in public DNS records, you may need to set a DNS resolver for the containers by specifying the @--dns@ address with the IP address of an appropriate nameserver.  You may specify this option more than once to use multiple nameservers.
+
+h2. Set up SLURM
+
+Install SLURM following "the same process you used to install the Crunch dispatcher":install-crunch-dispatch.html#slurm.
+
+h2. Copy configuration files from the dispatcher (API server)
+
+The @/etc/slurm-llnl/slurm.conf@ and @/etc/munge/munge.key@ files need to be identicaly across the dispatcher and all compute nodes. Copy the files you created in the "Install the Crunch dispatcher":install-crunch-dispatch.html step to this compute node.
+
+h2. Configure FUSE
+
+Install this file as @/etc/fuse.conf@:
+
+<notextile>
+<pre>
+# Set the maximum number of FUSE mounts allowed to non-root users.
+# The default is 1000.
+#
+#mount_max = 1000
+
+# Allow non-root users to specify the 'allow_other' or 'allow_root'
+# mount options.
+#
+user_allow_other
+</pre>
+</notextile>
+
+h2. Configure the Docker cleaner
+
+The arvados-docker-cleaner program removes least recently used docker images as needed to keep disk usage below a configured limit.
+
+On Debian-based systems, install runit:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install runit</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems, "install runit from source":http://smarden.org/runit/install.html or use an alternative daemon supervisor.
+
+Configure runit to run the image cleaner using a suitable quota for your compute nodes and workload:
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd /etc/sv</span>
+/etc/sv$ <span class="userinput">sudo mkdir arvados-docker-cleaner; cd arvados-docker-cleaner</span>
+/etc/sv/arvados-docker-cleaner$ <span class="userinput">sudo mkdir log log/main</span>
+/etc/sv/arvados-docker-cleaner$ <span class="userinput">sudo sh -c 'cat &gt;log/run' &lt;&lt;'EOF'
+#!/bin/sh
+exec svlogd -tt main
+EOF</span>
+/etc/sv/arvados-docker-cleaner$ <span class="userinput">sudo sh -c 'cat &gt;run' &lt;&lt;'EOF'
+#!/bin/sh
+exec python3 -m arvados_docker.cleaner --quota <b>50G</b>
+EOF</span>
+/etc/sv/arvados-docker-cleaner$ <span class="userinput">sudo chmod +x run log/run</span>
+</code></pre>
+</notextile>
+
+If you are using a different daemon supervisor, or if you want to test the daemon in a terminal window, an equivalent shell command to run arvados-docker-cleaner is:
+
+<notextile>
+<pre><code><span class="userinput">python3 -m arvados_docker.cleaner --quota <b>50G</b></span>
+</code></pre>
+</notextile>
+
+h2. Add a Crunch user account
+
+Create a Crunch user account, and add it to the @fuse@ and @docker@ groups so it can use those tools:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo useradd --groups fuse,docker crunch</span>
+</code></pre>
+</notextile>
+
+The crunch user should have the same UID, GID, and home directory across all compute nodes and the dispatcher (API server).
+
+h2. Tell the API server about this compute node
+
+Load your API superuser token on the compute node:
+
+<notextile>
+<pre><code>
+~$ <span class="userinput">HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'</span>
+~$ <span class="userinput">export ARVADOS_API_TOKEN=@your-superuser-token@</span>
+~$ <span class="userinput">export ARVADOS_API_HOST=@uuid_prefix.your.domain@</span>
+~$ <span class="userinput">unset ARVADOS_API_HOST_INSECURE</span>
+</code>
+</pre>
+</notextile>
+
+Then execute this script to create a compute node object, and set up a cron job to have the compute node ping the API server every five minutes:
+
+<notextile>
+<pre><code>
+#!/bin/bash
+set -e
+if ! test -f /root/node.json ; then
+    python - &lt;&lt;EOF
+import arvados, json, socket
+fqdn = socket.getfqdn()
+hostname, _, domain = fqdn.partition('.')
+ip_address = socket.gethostbyname(fqdn)
+node = arvados.api('v1').nodes().create(body={'hostname': hostname, 'domain': domain, 'ip_address': ip_address}).execute()
+with open('/root/node.json', 'w') as node_file:
+    json.dump(node, node_file, indent=2)
+EOF
+
+    # Make sure /dev/fuse permissions are correct (the device appears after fuse is loaded)
+    chmod 1660 /dev/fuse && chgrp fuse /dev/fuse
+fi
+
+UUID=`grep \"uuid\" /root/node.json  |cut -f4 -d\"`
+PING_SECRET=`grep \"ping_secret\" /root/node.json  |cut -f4 -d\"`
+
+if ! test -f /etc/cron.d/node_ping ; then
+    echo "*/5 * * * * root /usr/bin/curl -k -d ping_secret=$PING_SECRET https://$ARVADOS_API_HOST/arvados/v1/nodes/$UUID/ping" > /etc/cron.d/node_ping
+fi
+
+/usr/bin/curl -k -d ping_secret=$PING_SECRET https://$ARVADOS_API_HOST/arvados/v1/nodes/$UUID/ping?ping_secret=$PING_SECRET
+</code>
+</pre>
+</notextile>
+
+And remove your token from the environment:
+
+<notextile>
+<pre><code>
+~$ <span class="userinput">unset ARVADOS_API_TOKEN</span>
+~$ <span class="userinput">unset ARVADOS_API_HOST</span>
+</code>
+</pre>
+</notextile>
+
diff --git a/doc/install/install-crunch-dispatch.html.textile.liquid b/doc/install/install-crunch-dispatch.html.textile.liquid

index 231d1f45e854956789a95a878167e2cb87ecef17..370a6e7c3d04fce578ccc1c231e82ec0e44960f6 100644 (file)
--- a/doc/install/install-crunch-dispatch.html.textile.liquid
+++ b/doc/install/install-crunch-dispatch.html.textile.liquid
@@ -5,84 +5,187 @@ title: Install the Crunch dispatcher
  
  ...
  
+The dispatcher normally runs on the same host/VM as the API server.
  
+h2. Test the Arvados job queue
  
-The dispatcher normally runs on the same host/VM as the API server.
+Crunch dispatches work from the job queue on the Arvados API server.  Before you start installing the Crunch dispatcher, now's a good time to check that the API server and Git server can coordinate to create job records.  Run these commands *on your shell server* to create a collection, and a job to calculate the MD5 checksum of every file in it:
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo 'Hello, Crunch!' | arv-put --portable-data-hash -</span>
+&hellip;
+d40c7f35d80da669afb9db1896e760ad+49
+~$ <span class="userinput">read -rd $'\000' newjob &lt;&lt;EOF; arv job create --job "$newjob"
+{"script_parameters":{"input":"d40c7f35d80da669afb9db1896e760ad+49"},
+ "script_version":"0988acb472849dc0",
+ "script":"hash",
+ "repository":"arvados"}
+EOF</span>
+</code></pre>
+</notextile>
+
+If you get the error
+
+<pre>
+ArgumentError: Specified script_version does not resolve to a commit
+</pre>
  
-h4. Perl SDK dependencies
+it often means that the API server can't read the specified repository&mdash;either because it doesn't exist, or because the user running the API server doesn't have permission to read the repository files.  Check the API server's log (@/var/www/arvados-api/current/log/production.log@) for details, and double-check the instructions in the "Git server installation guide":install-arv-git-httpd.html.
+
+If everything goes well, the API server should create a job record, and your @arv@ command will output the JSON for that record.  It should have state @Queued@ and script_version @0988acb472849dc08d576ee40493e70bde2132ca@.  If the job JSON includes those fields, you can proceed to install the Crunch dispatcher and a compute node.  This job will remain queued until you install those services.
+
+h2. Perl SDK dependencies
  
  Install the Perl SDK on the controller.
  
  * See "Perl SDK":{{site.baseurl}}/sdk/perl/index.html page for details.
  
-h4. Python SDK dependencies
+h2. Python SDK dependencies
  
  Install the Python SDK and CLI tools on controller and all compute nodes.
  
  * See "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html page for details.
  
-h4. Likely crunch job dependencies
+h2(#slurm). Set up SLURM
  
-On compute nodes:
+On the API server, install SLURM and munge, and generate a munge key.
  
-* @pip install --upgrade pyvcf@
+On Debian-based systems:
  
-h4. Crunch user account
+<notextile>
+<pre><code>~$ <span class="userinput">sudo /usr/bin/apt-get install slurm-llnl munge</span>
+~$ <span class="userinput">sudo /usr/sbin/create-munge-key</span>
+</code></pre>
+</notextile>
  
-On compute nodes and controller:
+On Red Hat-based systems, "install SLURM and munge from source following their installation guide":https://computing.llnl.gov/linux/slurm/quickstart_admin.html.
  
-* @adduser crunch@
+Now we need to give SLURM a configuration file in @/etc/slurm-llnl/slurm.conf@. Here's an example:
  
-The crunch user should have the same UID, GID, and home directory on all compute nodes and on the controller.
+<notextile>
+<pre>
+ControlMachine=uuid_prefix.your.domain
+SlurmctldPort=6817
+SlurmdPort=6818
+AuthType=auth/munge
+StateSaveLocation=/tmp
+SlurmdSpoolDir=/tmp/slurmd
+SwitchType=switch/none
+MpiDefault=none
+SlurmctldPidFile=/var/run/slurmctld.pid
+SlurmdPidFile=/var/run/slurmd.pid
+ProctrackType=proctrack/pgid
+CacheGroups=0
+ReturnToService=2
+TaskPlugin=task/affinity
+#
+# TIMERS
+SlurmctldTimeout=300
+SlurmdTimeout=300
+InactiveLimit=0
+MinJobAge=300
+KillWait=30
+Waittime=0
+#
+# SCHEDULING
+SchedulerType=sched/backfill
+SchedulerPort=7321
+SelectType=select/cons_res
+SelectTypeParameters=CR_CPU_Memory
+FastSchedule=1
+#
+# LOGGING
+SlurmctldDebug=3
+#SlurmctldLogFile=
+SlurmdDebug=3
+#SlurmdLogFile=
+JobCompType=jobcomp/none
+#JobCompLoc=
+JobAcctGatherType=jobacct_gather/none
+#
+# COMPUTE NODES
+NodeName=DEFAULT
+PartitionName=DEFAULT MaxTime=INFINITE State=UP
+
+NodeName=compute[0-255]
+PartitionName=compute Nodes=compute[0-255] Default=YES Shared=YES
+</pre>
+</notextile>
  
-h4. Repositories
+h3. SLURM configuration essentials
  
-Crunch scripts must be in Git repositories in the directory configured as @git_repositories_dir@/*.git (see the "API server installation":install-api-server.html#git_repositories_dir).
+Whenever you change this file, you will need to update the copy _on every compute node_ as well as the controller node, and then run @sudo scontrol reconfigure@.
  
-Once you have a repository with commits -- and you have read access to the repository -- you should be able to create a new job:
+*@ControlMachine@* should be a DNS name that resolves to the SLURM controller (dispatch/API server). This must resolve correctly on all SLURM worker nodes as well as the controller itself. In general SLURM is very sensitive about all of the nodes being able to communicate with the controller _and one another_, all using the same DNS names.
  
-<pre>
-read -rd $'\000' newjob <<EOF; arv job create --job "$newjob"
-{"script_parameters":{"input":"f815ec01d5d2f11cb12874ab2ed50daa"},
- "script_version":"master",
- "script":"hash",
- "repository":"arvados"}
-EOF
-</pre>
+*@NodeName=compute[0-255]@* establishes that the hostnames of the worker nodes will be compute0, compute1, etc. through compute255.
+* There are several ways to compress sequences of names, like @compute[0-9,80,100-110]@. See the "hostlist" discussion in the @slurm.conf(5)@ and @scontrol(1)@ man pages for more information.
+* It is not necessary for all of the nodes listed here to be alive in order for SLURM to work, although you should make sure the DNS entries exist. It is easiest to define lots of hostnames up front, assigning them to real nodes and updating your DNS records as the nodes appear. This minimizes the frequency of @slurm.conf@ updates and use of @scontrol reconfigure@.
  
-Without getting this error:
+Each hostname in @slurm.conf@ must also resolve correctly on all SLURM worker nodes as well as the controller itself. Furthermore, the hostnames used in the configuration file must match the hostnames reported by @hostname@ or @hostname -s@ on the nodes themselves. This applies to the ControlMachine as well as the worker nodes.
  
-<pre>
-ArgumentError: Specified script_version does not resolve to a commit
-</pre>
+For example:
+* In @/etc/slurm-llnl/slurm.conf@ on control and worker nodes: @ControlMachine=uuid_prefix.your.domain@
+* In @/etc/slurm-llnl/slurm.conf@ on control and worker nodes: @NodeName=compute[0-255]@
+* In @/etc/resolv.conf@ on control and worker nodes: @search uuid_prefix.your.domain@
+* On the control node: @hostname@ reports @uuid_prefix.your.domain@
+* On worker node 123: @hostname@ reports @compute123.uuid_prefix.your.domain@
  
-h4. Running jobs
+h3. Automatic hostname assignment
  
-* @services/api/script/crunch-dispatch.rb@ must be running.
-* @crunch-dispatch.rb@ needs @services/crunch/crunch-job@ in its @PATH@.
-* @crunch-job@ needs @sdk/perl/lib@ and @warehouse-apps/libwarehouse-perl/lib@ in its @PERLLIB@
-* @crunch-job@ needs @ARVADOS_API_HOST@ (and, if necessary in a development environment, @ARVADOS_API_HOST_INSECURE@)
+If your worker node bootstrapping script (see "Installing a compute node":install-compute-node.html) does not send the worker's current hostname, the API server will choose an unused hostname from the set given in @application.yml@, which defaults to @compute[0-255]@.
  
-Example @/var/service/arvados_crunch_dispatch/run@ script:
+If it is not feasible to give your compute nodes hostnames like compute0, compute1, etc., you can accommodate other naming schemes with a bit of extra configuration.
  
-<pre>
-#!/bin/sh
+If you want Arvados to assign names to your nodes with a different consecutive numeric series like @{worker1-0000, worker1-0001, worker1-0002}@, add an entry to @application.yml@; see @/var/www/arvados-api/current/config/application.default.yml@ for details. Example:
+* In @application.yml@: <code>assign_node_hostname: worker1-%<slot_number>04d</code>
+* In @slurm.conf@: <code>NodeName=worker1-[0000-0255]</code>
+
+If your worker hostnames are already assigned by other means, and the full set of names is known in advance, have your worker node bootstrapping script (see "Installing a compute node":install-compute-node.html) send its current hostname, rather than expect Arvados to assign one.
+* In @application.yml@: <code>assign_node_hostname: false</code>
+* In @slurm.conf@: <code>NodeName=alice,bob,clay,darlene</code>
+
+If your worker hostnames are already assigned by other means, but the full set of names is _not_ known in advance, you can use the @slurm.conf@ and @application.yml@ settings in the previous example, but you must also update @slurm.conf@ (both on the controller and on all worker nodes) and run @sudo scontrol reconfigure@ whenever a new node comes online.
+
+h2. Enable SLURM job dispatch
+
+In your API server's @application.yml@ configuration file, add the line @crunch_job_wrapper: :slurm_immediate@ under the appropriate section.  (The second colon is not a typo.  It denotes a Ruby symbol.)
+
+h2. Crunch user account
+
+Run @sudo adduser crunch@.  The crunch user should have the same UID, GID, and home directory on all compute nodes and on the dispatcher (API server).
+
+h2. Run the Crunch dispatcher service
+
+To dispatch Arvados jobs:
+
+* The API server script @crunch-dispatch.rb@ must be running.
+* @crunch-job@ needs the installation path of the Perl SDK in its @PERLLIB@.
+* @crunch-job@ needs the @ARVADOS_API_HOST@ (and, if necessary, @ARVADOS_API_HOST_INSECURE@) environment variable set.
+
+We recommend you run @crunch-dispatch.rb@ under "runit":http://smarden.org/runit/ or a similar supervisor.  Here's an example runit service script:
+
+<notextile>
+<pre><code>#!/bin/sh
  set -e
  
  rvmexec=""
-## uncomment this line if you use rvm:
-#rvmexec="/usr/local/rvm/bin/rvm-exec 2.1.1"
+## Uncomment this line if you use RVM:
+#rvmexec="/usr/local/rvm/bin/rvm-exec default"
  
-export PATH="$PATH":/path/to/arvados/services/crunch
-export ARVADOS_API_HOST={{ site.arvados_api_host }}
+export ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span>
  export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
+export RAILS_ENV=production
  
-fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
+## Uncomment this line if your cluster uses self-signed SSL certificates:
+#export ARVADOS_API_HOST_INSECURE=yes
  
-## Only if your SSL cert is unverifiable:
-# export ARVADOS_API_HOST_INSECURE=yes
+# This is the path to docker on your compute nodes. You might need to
+# change it to "docker", "/opt/bin/docker", etc.
+export CRUNCH_JOB_DOCKER_BIN=<span class="userinput">docker.io</span>
  
-cd /path/to/arvados/services/api
-export RAILS_ENV=production
+fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
+cd /var/www/arvados-api/services/api
  exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
-</pre>
+</code></pre>
+</notextile>
diff --git a/doc/install/install-docker.html.textile.liquid b/doc/install/install-docker.html.textile.liquid

index e84b59203e95d0af11973c544a1283e6c268c8eb..1566e80a9cc4a7f6519793f101cc5662177e6f40 100644 (file)
--- a/doc/install/install-docker.html.textile.liquid
+++ b/doc/install/install-docker.html.textile.liquid
@@ -1,16 +1,21 @@
  ---
  layout: default
  navsection: installguide
-title: Build and install docker images
+title: Build and install Docker images
  ...
  
-This installation method is appropriate for local testing, evaluation, and development. For production use, this method is not recommended.
+This method is intended for evaluation and development on a local workstation. It is not suitable for production use in a cluster deployment.
  
  h2. Prerequisites
  
  # A GNU/Linux (virtual) machine
  # A working Docker installation (see "Installing Docker":https://docs.docker.com/installation/)
  # A working Go installation (see "Install the Go tools":https://golang.org/doc/install)
+# A working Ruby installation, with the Bundler gem installed
+
+h3. Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
  
  h2. Download the source tree
  
@@ -50,7 +55,7 @@ Successfully built d6cbb5002604
  date >shell-image
  </code></pre></notextile>
  
-If all goes well, you should now have a number of docker images built:
+If all goes well, you should now have a number of Docker images built (the image id will be different):
  
  <notextile>
  <pre><code>~$ <span class="userinput">docker.io images</span>
@@ -58,6 +63,7 @@ REPOSITORY              TAG                 IMAGE ID            CREATED
  arvados/shell           latest              d6cbb5002604        10 minutes ago      1.613 GB
  arvados/sso             latest              377f1fa0108e        11 minutes ago      1.807 GB
  arvados/keep            latest              ade0e0d2dd00        12 minutes ago      210.8 MB
+arvados/keepproxy       latest              b39ef0baba02        12 minutes ago      241.6 MB
  arvados/workbench       latest              b0e4fb6da385        12 minutes ago      2.218 GB
  arvados/doc             latest              4b64daec9454        12 minutes ago      1.524 GB
  arvados/compute         latest              7f1f5f7faf54        13 minutes ago      1.862 GB
@@ -119,7 +125,13 @@ The @arvdock@ command can be used to start and stop the docker containers. It ha
  <pre><code>
  ~$ <span class="userinput">./arvdock -h</span>
  
-usage: ./arvdock (start|stop|restart|test) [options]
+usage: ./arvdock (start|stop|restart|reset|test) [options]
+
+start    run new or restart stopped arvados containers
+stop     stop arvados containers
+restart  stop and then start arvados containers
+reset    stop and delete containers WARNING: this will delete the data inside Arvados!
+test     run tests
  
  ./arvdock start/stop/restart options:
    -d[port], --doc[=port]        Documentation server (default port 9898)
@@ -130,7 +142,7 @@ usage: ./arvdock (start|stop|restart|test) [options]
    -v, --vm                      Shell server
    -n, --nameserver              Nameserver
    -k, --keep                    Keep servers
-  --ssh                         Enable SSH access to server containers
+  -p, --keepproxy               Keepproxy server
    -h, --help                    Display this help and exit
  
    If no options are given, the action is applied to all servers.
@@ -141,40 +153,78 @@ usage: ./arvdock (start|stop|restart|test) [options]
  </pre>
  </notextile>
  
-The @--ssh@ option can be useful to debug issues with the Docker containers; it allows you to ssh into the running containers as the @root@ user, provided you have access to the private key that matches the public key specified in @config.yml@'s PUBLIC_KEY_PATH variable.
+To debug issues with the Docker containers use @docker exec@, for example:
+
+<notextile>
+<pre><code>
+~$ <span class="userinput">docker exec -t -i api_server /bin/bash</span>
+</code></pre></notextile>
  
-Start the docker containers:
+To start the docker containers:
  
  <notextile>
  <pre><code>
  ~$ <span class="userinput">./arvdock start</span>
-sso_server
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 9901:443 --name sso_server arvados/sso
-api_server
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 9900:443 --name api_server --link sso_server:sso arvados/api
-keep_server_0
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 25107:25107 --name keep_server_0 -v /tmp/tmp.aCSx8Pq6Wb:/dev/keep-0 --link api_server:api arvados/keep
-keep_server_1
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 25108:25107 --name keep_server_1 -v /tmp/tmp.m4OQ9WB73G:/dev/keep-0 --link api_server:api arvados/keep
-doc_server
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 9898:80 --name doc_server arvados/doc
+start_api=false
+start_compute=false
+start_doc=false
+start_keep=false
+start_keepproxy=false
+start_nameserver=false
+start_sso=false
+start_vm=false
+start_workbench=false
+Starting crosbymichael/skydns container...
+skydns
+/usr/bin/docker.io run -d -p 172.17.42.1:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -domain arvados
+89922ec786cbd2098ed6bae205468a675657cdb2db0e0bfdfe76d1e43cb2fe35
+Starting crosbymichael/skydock container...
+skydock
+/usr/bin/docker.io run -d -v /var/run/docker.sock:/docker.sock --name skydock crosbymichael/skydock -ttl 30 -environment dev -s /docker.sock -domain arvados -name skydns
+de6a35bb20fb9063fb97218ba2554974546eed969d43b1f5aa31a1ac9576e802
+Starting container: api_server
+  /usr/bin/docker.io start api_server
+Started container: api_server
+Starting container: compute0
+  /usr/bin/docker.io start compute0
+Started container: compute0
+Starting container: compute1
+  /usr/bin/docker.io start compute1
+Started container: compute1
+Starting container: keep_server_0
+  /usr/bin/docker.io start keep_server_0
+Started container: keep_server_0
+Starting container: keep_server_1
+  /usr/bin/docker.io start keep_server_1
+Started container: keep_server_1
+Starting container: keepproxy_server
+  /usr/bin/docker.io start keepproxy_server
+Started container: keepproxy_server
+Starting container: doc_server
+  /usr/bin/docker.io start doc_server
+Started container: doc_server
+
+******************************************************************
+You can access the Arvados documentation at http://doc.dev.arvados
+******************************************************************
+
+Starting container: shell
+  /usr/bin/docker.io start shell
+Started container: shell
+Starting container: workbench_server
+  /usr/bin/docker.io start workbench_server
+Started container: workbench_server
+
+********************************************************************
+You can access the Arvados workbench at http://workbench.dev.arvados
+********************************************************************
  
  *****************************************************************
-You can access the Arvados documentation at http://localhost:9898
-*****************************************************************
-
-workbench_server
-Starting container:
-  /usr/bin/docker.io run -d -i -t -p 9899:80 --name workbench_server --link api_server:api arvados/workbench
+To access Arvados you must add the Arvados nameserver to the top
+of your DNS configuration in /etc/resolv.conf:
+nameserver 172.17.42.1
+******************************************************************
  
-*****************************************************************
-You can access the Arvados workbench at http://localhost:9899
-*****************************************************************
  </code></pre></notextile>
  
  h2. Accessing workbench
@@ -182,12 +232,10 @@ h2. Accessing workbench
  Point your browser to the Dockerized workbench:
  
  <notextile>
-<pre><code><span class="userinput">https://localhost:9899</span>
+<pre><code><span class="userinput">http://workbench.dev.arvados</span>
  </code></pre>
  </notextile>
  
  Now use the google account you specified as @API_AUTO_ADMIN_USER@ in @config.yml@ to log in.
  
  You will be prompted by your browser that you are accessing a site with an untrusted SSL certificate. This is normal; by default the Arvados Docker installation uses self-signed SSL certificates for the SSO and API servers, respectively. If you use a local SSO server in a Docker container, you will be prompted *twice*. The default is to use the Curoverse SSO server.
-
-
diff --git a/doc/install/install-keepproxy.html.textile.liquid b/doc/install/install-keepproxy.html.textile.liquid

index 9e8f8787825490ce5c7eab1fdb6427f8f564df6e..6a531a37848d2c2eaa5af34e20fb63c32351f4bf 100644 (file)
--- a/doc/install/install-keepproxy.html.textile.liquid
+++ b/doc/install/install-keepproxy.html.textile.liquid
@@ -4,8 +4,6 @@ navsection: installguide
  title: Install Keepproxy server
  ...
  
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
  The Keepproxy server is a gateway into your Keep storage. Unlike the Keepstore servers, which are only accessible on the local LAN, Keepproxy is designed to provide secure access into Keep from anywhere on the internet.
  
  By convention, we use the following hostname for the Keepproxy:
@@ -20,13 +18,17 @@ This hostname should resolve from anywhere on the internet.
  
  h2. Install Keepproxy
  
-First add the Arvados apt repository, and then install the Keepproxy package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keepproxy</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
  
  <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keepproxy</span>
+<pre><code>~$ <span class="userinput">sudo yum install keepproxy</span>
  </code></pre>
  </notextile>
  
@@ -43,8 +45,6 @@ Usage of default:
  </code></pre>
  </notextile>
  
-It's recommended to run Keepproxy under "runit":https://packages.debian.org/search?keywords=runit or something similar.
-
  h3. Create an API token for the Keepproxy server
  
  The Keepproxy server needs a token to talk to the API server.
@@ -52,11 +52,18 @@ The Keepproxy server needs a token to talk to the API server.
  On the <strong>API server</strong>, use the following command to create the token:
  
  <notextile>
-<pre><code>~/arvados/services/api/script$ <span class="userinput">RAILS_ENV=production ./get_anonymous_user_token.rb</span>
+<pre><code>~/arvados/services/api/script$ <span class="userinput">RAILS_ENV=production bundle exec ./get_anonymous_user_token.rb</span>
  hoShoomoo2bai3Ju1xahg6aeng1siquuaZ1yae2gi2Uhaeng2r
  </code></pre></notextile>
  
-The value for the @api_token@ field should be added to Keepproxy's environment as ARVADOS_API_TOKEN. Make sure to also set ARVADOS_API_HOST to @uuid_prefix@.your.domain.
+h3. Set up the Keepproxy service
+
+We recommend you run Keepproxy under "runit":http://smarden.org/runit/ or a similar supervisor.  Make sure the launcher sets the envirnoment variables @ARVADOS_API_TOKEN@ (with the token you just generated), @ARVADOS_API_HOST@, and, if needed, @ARVADOS_API_HOST_INSECURE@.  The core keepproxy command to run is:
+
+<notextile>
+<pre><code>ARVADOS_API_TOKEN=<span class="userinput">[generated token]</span> ARVADOS_API_HOST=<span class="userinput">uuid_prefix.your.domain</span> exec keepproxy
+</code></pre>
+</notextile>
  
  h3. Set up a reverse proxy with SSL support
  
@@ -64,35 +71,47 @@ Because the Keepproxy is intended for access from anywhere on the internet, it i
  
  This is best achieved by putting a reverse proxy with SSL support in front of Keepproxy. Keepproxy itself runs on port 25107 by default; your reverse proxy can run on port 443 and pass requests to Keepproxy on port 25107.
  
-If possible, the proxy should be configured to add CORS headers to its own error responses -- otherwise in-browser applications can't report proxy errors. For example, in nginx >= 1.7.5:
-
  <notextile><pre>
+upstream keepproxy {
+  server                127.0.0.1:<span class="userinput">25107</span>;
+}
+
  server {
-  server_name keep.example.com
-  ...
-  add_header 'Access-Control-Allow-Methods' 'GET, HEAD, POST, PUT, OPTIONS' always
-  add_header 'Access-Control-Allow-Origin' '*' always
-  add_header 'Access-Control-Allow-Headers' 'Authorization, Content-Length, Content-Type, X-Keep-Desired-Replicas' always
-  add_header 'Access-Control-Max-Age' '86486400' always
+  listen                <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name           keep.<span class="userinput">uuid_prefix</span>.your.domain
+
+  proxy_connect_timeout 90s;
+  proxy_read_timeout    300s;
+  proxy_set_header      X-Real-IP $remote_addr;
+
+  ssl                   on;
+  ssl_certificate       /etc/nginx/keep.<span class="userinput">uuid_prefix</span>.your.domain-ssl.crt;
+  ssl_certificate_key   /etc/nginx/keep.<span class="userinput">uuid_prefix</span>.your.domain-ssl.key;
+
+  # Clients need to be able to upload blocks of data up to 64MiB in size.
+  client_max_body_size  64m;
+
+  location / {
+    proxy_pass          http://keepproxy;
+  }
  }
  </pre></notextile>
  
-*Warning:* Make sure you don't inadvertently add CORS headers for services _other than keepproxy_ while you're doing this.
+Note: if the Web uploader is failing to upload data and there are no logs from keepproxy, be sure to check the nginx proxy logs.  In addition to "GET" and "PUT", The nginx proxy must pass "OPTIONS" requests to keepproxy, which should respond with appropriate Cross-origin resource sharing headers.  If the CORS headers are not present, brower security policy will cause the upload request to silently fail.  The CORS headers are generated by keepproxy and should not be set in nginx.
  
  h3. Tell the API server about the Keepproxy server
  
  The API server needs to be informed about the presence of your Keepproxy server. Please execute the following commands on your <strong>shell server</strong>.
  
  <notextile>
-<pre><code>~$ <span class="userinput">prefix=`arv --format=uuid user current | cut -d- -f1`</span>
-~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
+<pre><code>~$ <span class="userinput">uuid_prefix=`arv --format=uuid user current | cut -d- -f1`</span>
+~$ <span class="userinput">echo "Site prefix is '$uuid_prefix'"</span>
  ~$ <span class="userinput">read -rd $'\000' keepservice &lt;&lt;EOF; arv keep_service create --keep-service "$keepservice"</span>
  <span class="userinput">{
- "service_host":"keep.$prefix.your.domain",
+ "service_host":"<strong>keep.$uuid_prefix.your.domain</strong>",
   "service_port":443,
   "service_ssl_flag":true,
   "service_type":"proxy"
  }
  EOF</span>
  </code></pre></notextile>
-
diff --git a/doc/install/install-keepstore.html.textile.liquid b/doc/install/install-keepstore.html.textile.liquid

index 7fb810d841913c34603126229baae98af7cb0ddf..4cb46e13801e78a95b51c26cce0f3696cf8b3f78 100644 (file)
--- a/doc/install/install-keepstore.html.textile.liquid
+++ b/doc/install/install-keepstore.html.textile.liquid
@@ -4,8 +4,6 @@ navsection: installguide
  title: Install Keepstore servers
  ...
  
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
  We are going to install two Keepstore servers. By convention, we use the following hostname pattern:
  
  <div class="offset1">
@@ -19,13 +17,17 @@ Because the Keepstore servers are not directly accessible from the internet, the
  
  h2. Install Keepstore
  
-First add the Arvados apt repository, and then install the Keepstore package.
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install keepstore</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
  
  <notextile>
-<pre><code>~$ <span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</span>
-~$ <span class="userinput">sudo /usr/bin/apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get update</span>
-~$ <span class="userinput">sudo /usr/bin/apt-get install keepstore</span>
+<pre><code>~$ <span class="userinput">sudo yum install keepstore</span>
  </code></pre>
  </notextile>
  
@@ -33,36 +35,40 @@ Verify that Keepstore is functional:
  
  <notextile>
  <pre><code>~$ <span class="userinput">keepstore -h</span>
-2014/10/29 14:23:38 Keep started: pid 6848
-Usage of keepstore:
+2015/05/08 13:41:16 keepstore starting, pid 2565
+Usage of ./keepstore:
+  -blob-signature-ttl=1209600: Lifetime of blob permission signatures. See services/api/config/application.default.yml.
+  -blob-signing-key-file="": File containing the secret key for generating and verifying blob permission signatures.
    -data-manager-token-file="": File with the API token used by the Data Manager. All DELETE requests or GET /index requests must carry this token.
    -enforce-permissions=false: Enforce permission signatures on requests.
-  -listen=":25107": Interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.
+  -listen=":25107": Listening address, in the form "host:port". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.
+  -max-buffers=128: Maximum RAM to use for data buffers, given in multiples of block size (64 MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.
    -never-delete=false: If set, nothing will be deleted. HTTP 405 will be returned for valid DELETE requests.
-  -permission-key-file="": File containing the secret key for generating and verifying permission signatures.
-  -permission-ttl=1209600: Expiration time (in seconds) for newly generated permission signatures.
-  -pid="": Path to write pid file
-  -serialize=false: If set, all read and write operations on local Keep volumes will be serialized.
-  -volumes="": Comma-separated list of directories to use for Keep volumes, e.g. -volumes=/var/keep1,/var/keep2. If empty or not supplied, Keep will scan mounted filesystems for volumes with a /keep top-level directory.
+  -permission-key-file="": Synonym for -blob-signing-key-file.
+  -permission-ttl=0: Synonym for -blob-signature-ttl.
+  -pid="": Path to write pid file during startup. This file is kept open and locked with LOCK_EX until keepstore exits, so `fuser -k pidfile` is one way to shut down. Exit immediately if there is an error opening, locking, or writing the pid file.
+  -readonly=false: Do not write, delete, or touch anything on the following volumes.
+  -serialize=false: Serialize read and write operations on the following volumes.
+  -volume=[]: Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named "keep" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.
+  -volumes=[]: Deprecated synonym for -volume.
  </code></pre>
  </notextile>
  
-If you want access control on your Keepstore server(s), you should provide a permission key. The @-permission-key-file@ argument should contain the path to a file that contains a single line with a long random alphanumeric string. It should be the same as the @blob_signing_key@ that can be set in the "API server":install-api-server.html config/application.yml file.
+If you want access control on your Keepstore server(s), you must specify the @-enforce-permissions@ flag and provide a signing key. The @-blob-signing-key-file@ argument should be a file containing a long random alphanumeric string with no internal line breaks (it is also possible to use a socket or FIFO: keepstore reads it only once, at startup). This key must be the same as the @blob_signing_key@ configured in the "API server":install-api-server.html config/application.yml file.
+
+The @-max-buffers@ argument can be used to restrict keepstore's memory use. By default, keepstore will allocate no more than 128 blocks (8 GiB) worth of data buffers at a time. Normally this should be set as high as possible without risking swapping.
  
  Prepare one or more volumes for Keepstore to use. Simply create a /keep directory on all the partitions you would like Keepstore to use, and then start Keepstore. For example, using 2 tmpfs volumes:
  
  <notextile>
-<pre><code>~$ <span class="userinput">keepstore</span>
-2014/10/29 11:41:37 Keep started: pid 20736
-2014/10/29 11:41:37 adding Keep volume: /tmp/tmp.vwSCtUCyeH/keep
-2014/10/29 11:41:37 adding Keep volume: /tmp/tmp.Lsn4w8N3Xv/keep
-2014/10/29 11:41:37 Running without a PermissionSecret. Block locators returned by this server will not be signed, and will be rejected by a server that enforces permissions.
-2014/10/29 11:41:37 To fix this, run Keep with --permission-key-file=<path> to define the location of a file containing the permission key.
-
+<pre><code>~$ <span class="userinput">keepstore -blob-signing-key-file=./blob-signing-key</span>
+2015/05/08 13:44:26 keepstore starting, pid 2765
+2015/05/08 13:44:26 Using volume [UnixVolume /mnt/keep] (writable=true)
+2015/05/08 13:44:26 listening at :25107
  </code></pre>
  </notextile>
  
-It's recommended to run Keepstore under "runit":https://packages.debian.org/search?keywords=runit or something similar.
+It's recommended to run Keepstore under "runit":http://smarden.org/runit/ or something similar.
  
  Repeat this section for each Keepstore server you are setting up.
  
@@ -77,7 +83,7 @@ Make sure to update the @service_host@ value to match each of your Keepstore ser
  ~$ <span class="userinput">echo "Site prefix is '$prefix'"</span>
  ~$ <span class="userinput">read -rd $'\000' keepservice &lt;&lt;EOF; arv keep_service create --keep-service "$keepservice"</span>
  <span class="userinput">{
- "service_host":"keep0.$prefix.your.domain",
+ "service_host":"<strong>keep0.$prefix.your.domain</strong>",
   "service_port":25107,
   "service_ssl_flag":false,
   "service_type":"disk"
diff --git a/doc/install/install-manual-prerequisites-ruby.html.textile.liquid b/doc/install/install-manual-prerequisites-ruby.html.textile.liquid

deleted file mode 100644 (file)

index 3bad424..0000000
--- a/doc/install/install-manual-prerequisites-ruby.html.textile.liquid
+++ /dev/null
@@ -1,29 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Install Ruby and bundler
-...
-
-We recommend Ruby >= 2.1.
-
-h2(#rvm). Option 1: Install with rvm
-
-<notextile>
-<pre><code>~$ <span class="userinput">\curl -sSL https://get.rvm.io | bash -s stable --ruby=2.1</span>
-~$ <span class="userinput">gem install bundler
-</span></code></pre></notextile>
-
-h2(#fromsource). Option 2: Install from source
-
-<notextile>
-<pre><code><span class="userinput">mkdir -p ~/src
-cd ~/src
-wget http://cache.ruby-lang.org/pub/ruby/2.1/ruby-2.1.5.tar.gz
-tar xzf ruby-2.1.5.tar.gz
-cd ruby-2.1.5
-./configure
-make
-sudo make install
-
-sudo gem install bundler</span>
-</code></pre></notextile>
diff --git a/doc/install/install-manual-prerequisites.html.textile.liquid b/doc/install/install-manual-prerequisites.html.textile.liquid

index 5fe214ec96d0439a1595dca4e0baa47bdb4ffb00..52a51a191aafb90188d1906583b419f0135ca49b 100644 (file)
--- a/doc/install/install-manual-prerequisites.html.textile.liquid
+++ b/doc/install/install-manual-prerequisites.html.textile.liquid
@@ -11,37 +11,104 @@ This guide assumes you have seven systems available in the same network subnet:
  <div class="offset1">
  table(table table-bordered table-condensed).
  |_Function_|_Number of nodes_|
-|Arvados REST API, Websockets, Workbench and Crunch dispatcher|1|
-|Arvados SSO server|1|
+|Arvados API, Crunch dispatcher, Git, Websockets and Workbench|1|
+|Arvados Compute node|1|
  |Arvados Keepproxy server|1|
  |Arvados Keepstore servers|2|
-|Arvados shell server|1|
-|Arvados compute node|1|
+|Arvados Shell server|1|
+|Arvados SSO server|1|
  </div>
  
  The number of Keepstore, shell and compute nodes listed above is a minimum. In a real production installation, you will likely run many more of each of those types of nodes. In such a scenario, you would probably also want to dedicate a node to the Workbench server and Crunch dispatcher, respectively. For performance reasons, you may want to run the database server on a separate node as well.
  
+h2(#repos). Arvados package repositories
+
+On any host where you install Arvados software, you'll need to set up an Arvados package repository.  They're available for several popular distributions.
+
+h3. CentOS
+
+Packages are available for CentOS 6.  First, register the Curoverse signing key in RPM's database:
+
+{% include 'install_redhat_key' %}
+
+Then save this configuration block in @/etc/yum.repos.d/arvados.repo@:
+
+<notextile>
+<pre><code>[arvados]
+name=Arvados
+baseurl=http://rpm.arvados.org/CentOS/$releasever/os/$basearch/
+</code></pre>
+</notextile>
+
+h3. Debian and Ubuntu
+
+Packages are available for Debian 7 ("wheezy"), Ubuntu 12.04 ("precise"), and Ubuntu 14.04 ("trusty").
+
+First, register the Curoverse signing key in apt's database:
+
+{% include 'install_debian_key' %}
+
+Configure apt to retrieve packages from the Arvados package repository. This command depends on your OS vendor and version:
+
+table(table table-bordered table-condensed).
+|OS version|Command|
+|Debian 7 ("wheezy")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ wheezy main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 12.04 ("precise")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ precise main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+|Ubuntu 14.04 ("trusty")|<notextile><code><span class="userinput">echo "deb http://apt.arvados.org/ trusty main" &#x7c; sudo tee /etc/apt/sources.list.d/arvados.list</span></code></notextile>|
+
+{% include 'notebox_begin' %}
+
+Arvados packages for Ubuntu may depend on third-party packages in Ubuntu's "universe" repository.  If you're installing on Ubuntu, make sure you have the universe sources uncommented in @/etc/apt/sources.list@.
+
+{% include 'notebox_end' %}
+
+Retrieve the package list:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get update</span>
+</code></pre>
+</notextile>
+
  h2. A unique identifier
  
-Each Arvados installation should have a globally unique identifier, which is a unique 5-character alphanumeric string. Here is a snippet of ruby that generates such a string based on the hostname of your computer:
+Each Arvados installation should have a globally unique identifier, which is a unique 5-character lowercase alphanumeric string. For testing purposes, here is one way to make a random 5-character string:
  
-<pre>
-Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4]
-</pre>
+<notextile>
+<pre><code>~$ <span class="userinput">tr -dc 0-9a-z &lt;/dev/urandom | head -c5; echo</span>
+</code></pre>
+</notextile>
  
-You may also use a different method to pick the unique identifier. The unique identifier will be part of the hostname of the services in your Arvados cluster. The rest of this documentation will refer to it as your @uuid_prefix@. 
+You may also use a different method to pick the unique identifier. The unique identifier will be part of the hostname of the services in your Arvados cluster. The rest of this documentation will refer to it as your @uuid_prefix@.
  
  
  h2. SSL certificates
  
-There are five public-facing services that will require an SSL certificate. If you do not have official SSL certificates, you can use self-signed certificates. By convention, we use the following hostname pattern:
+There are six public-facing services that require an SSL certificate. If you do not have official SSL certificates, you can use self-signed certificates.
+
+{% include 'notebox_begin' %}
+
+Most Arvados clients and services will accept self-signed certificates when the @ARVADOS_API_HOST_INSECURE@ environment variable is set to @true@.  However, web browsers generally do not make it easy for users to accept self-signed certificates from Web sites.
+
+Users who log in through Workbench will visit three sites: the SSO server, the API server, and Workbench itself.  When a browser visits each of these sites, it will warn the user if the site uses a self-signed certificate, and the user must accept it before continuing.  This procedure usually only needs to be done once in a browser.
+
+After that's done, Workbench includes JavaScript clients for other Arvados services.  Users are usually not warned if these client connections are refused because the server uses a self-signed certificate, and it is especially difficult to accept those cerficiates:
+
+* JavaScript connects to the Websockets server to provide incremental page updates and view logs from running jobs.
+* JavaScript connects to the API and Keepproxy servers to upload local files to collections.
+
+In sum, Workbench will be much less pleasant to use in a cluster that uses self-signed certificates.  You should avoid using self-signed certificates unless you plan to deploy a cluster without Workbench; you are deploying only to evaluate Arvados as an individual system administrator; or you can push configuration to users' browsers to trust your self-signed certificates.
+
+{% include 'notebox_end' %}
+
+By convention, we use the following hostname pattern:
  
  <div class="offset1">
  table(table table-bordered table-condensed).
  |_Function_|_Hostname_|
-|Arvados REST API|@uuid_prefix@.your.domain|
-|Arvados Websockets endpoint|ws.@uuid_prefix@.your.domain|
+|Arvados API|@uuid_prefix@.your.domain|
+|Arvados Git server|git.@uuid_prefix@.your.domain|
  |Arvados Keepproxy server|keep.@uuid_prefix@.your.domain|
-|Arvados Workbench|workbench.@uuid_prefix@.your.domain|
  |Arvados SSO Server|auth.your.domain|
+|Arvados Websockets endpoint|ws.@uuid_prefix@.your.domain|
+|Arvados Workbench|workbench.@uuid_prefix@.your.domain|
  </div>
diff --git a/doc/install/install-shell-server.html.textile.liquid b/doc/install/install-shell-server.html.textile.liquid

index 25ddf7b05761b56dbcba12a906b68fef4da2f05d..08ac57f5ba474a131381fb2d34460f051eda8a2e 100644 (file)
--- a/doc/install/install-shell-server.html.textile.liquid
+++ b/doc/install/install-shell-server.html.textile.liquid
@@ -4,14 +4,152 @@ navsection: installguide
  title: Install a shell server
  ...
  
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
-There is nothing inherently special about an Arvados shell server. It is just a GNU/Linux machine with the Arvados SDKs installed. For optimal performance, the Arvados shell server should be on the same LAN as the Arvados cluster, but that is not required.
+There is nothing inherently special about an Arvados shell server. It is just a GNU/Linux machine with Arvados utilites and SDKs installed. For optimal performance, the Arvados shell server should be on the same LAN as the Arvados cluster, but that is not required.
  
  h2. Install API tokens
  
-Please follow the "API token guide":{{site.baseurl}}/user/reference/api-tokens.html to get API tokens for your user and install them on your shell server. We will use those tokens to test the SDKs as we install them.
+Please follow the "API token guide":../user/reference/api-tokens.html to get API tokens for your Arvados account and install them on your shell server. We will use those tokens to test the SDKs as we install them.
+
+h2. Install the Ruby SDK and utilities
+
+If you're using RVM:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo /usr/local/rvm/bin/rvm-exec default gem install arvados-cli</span>
+</code></pre>
+</notextile>
+
+If you're not using RVM:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo gem install arvados-cli</span>
+</code></pre>
+</notextile>
+
+h2. Install the Python SDK and utilities
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install python-arvados-python-client python-arvados-fuse</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install python27-python-arvados-python-client python27-python-arvados-fuse</span>
+</code></pre>
+</notextile>
+
+{% include 'note_python27_sc' %}
+
+h2. Install Git and curl
+
+{% include 'install_git_curl' %}
+
+h2. Update Git Config
+
+Configure git to use the ARVADOS_API_TOKEN environment variable to authenticate to arv-git-httpd. We use the @--system@ flag so it takes effect for all current and future user accounts. It does not affect git's behavior when connecting to other git servers.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">sudo git config --system 'credential.https://git.<b>uuid_prefix.your.domain</b>/.username' none</span></code>
+<code>~$ <span class="userinput">sudo git config --system 'credential.https://git.<b>uuid_prefix.your.domain</b>/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
+
+h2. Install arvados-login-sync
+
+This program makes it possible for Arvados users to log in to the shell server -- subject to permissions assigned by the Arvados administrator -- using the SSH keys they upload to Workbench. It sets up login accounts, updates group membership, and adds users' public keys to the appropriate @authorized_keys@ files.
+
+Create an Arvados virtual_machine object representing this shell server. This will assign a UUID.
+
+<notextile>
+<pre>
+<code>apiserver:~$ <span class="userinput">arv --format=uuid virtual_machine create --virtual-machine '{"hostname":"<b>your.shell.server.hostname</b>"}'</span>
+zzzzz-2x53u-zzzzzzzzzzzzzzz</code>
+</pre>
+</notextile>
+
+Create a token that is allowed to read login information for this VM.
+
+<notextile>
+<pre>
+<code>apiserver:~$ <span class="userinput">arv api_client_authorization create --api-client-authorization '{"scopes":["GET /arvados/v1/virtual_machines/<b>zzzzz-2x53u-zzzzzzzzzzzzzzz</b>/logins"]}'
+{
+ ...
+ "api_token":"zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz",
+ ...
+}</code>
+</pre>
+</notextile>
+
+Note the UUID and the API token output by the above commands: you will need them in a minute.
+
+Install the arvados-login-sync program.
+
+If you're using RVM:
+
+<notextile>
+<pre>
+<code>shellserver:~$ <span class="userinput">sudo -i `which rvm-exec` default gem install arvados-login-sync</span></code>
+</pre>
+</notextile>
+
+If you're not using RVM:
+
+<notextile>
+<pre>
+<code>shellserver:~$ <span class="userinput">sudo -i gem install arvados-login-sync</span></code>
+</pre>
+</notextile>
+
+Install cron.
+
+On Debian-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo apt-get install cron</span>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install cron</span>
+</code></pre>
+</notextile>
+
+Configure cron to run the @arvados-login-sync@ program every 2 minutes.
+
+If you're using RVM:
+
+<notextile>
+<pre>
+<code>shellserver:~$ <span class="userinput">sudo bash -c 'umask 077; tee /etc/cron.d/arvados-login-sync' &lt;&lt;'EOF'
+ARVADOS_API_HOST="<strong>uuid_prefix.your.domain</strong>"
+ARVADOS_API_TOKEN="<strong>the_token_you_created_above</strong>"
+ARVADOS_VIRTUAL_MACHINE_UUID="<strong>zzzzz-2x53u-zzzzzzzzzzzzzzz</strong>"
+*/2 * * * * root /usr/local/rvm/bin/rvm-exec default arvados-login-sync
+EOF</span></code>
+</pre>
+</notextile>
+
+If you're not using RVM:
  
-h2. Install the SDKs
+<notextile>
+<pre>
+<code>shellserver:~$ <span class="userinput">sudo bash -c 'umask 077; tee /etc/cron.d/arvados-login-sync' &lt;&lt;'EOF'
+ARVADOS_API_HOST="<strong>uuid_prefix.your.domain</strong>"
+ARVADOS_API_TOKEN="<strong>the_token_you_created_above</strong>"
+ARVADOS_VIRTUAL_MACHINE_UUID="<strong>zzzzz-2x53u-zzzzzzzzzzzzzzz</strong>"
+*/2 * * * * root arvados-login-sync
+EOF</span></code>
+</pre>
+</notextile>
  
-Install the "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html and the "Command line SDK":{{site.baseurl}}/sdk/cli/install.html
+A user should be able to log in to the shell server when the following conditions are satisfied:
+* The user has uploaded an SSH public key: Workbench &rarr; Account menu &rarr; "SSH keys" item &rarr; "Add new SSH key" button.
+* As an admin user, you have given the user permission to log in: Workbench &rarr; Admin menu &rarr; "Users" item &rarr; "Show" button &rarr; "Admin" tab &rarr; "Setup shell account" button.
+* Two minutes have elapsed since the above conditions were satisfied, and the cron job has had a chance to run.
diff --git a/doc/install/install-sso.html.textile.liquid b/doc/install/install-sso.html.textile.liquid

index 4f6a9771f1d08bfed9fccc1cee30e9121efeca61..af999e95a922b14bbda53339b5d7eb29c20bae6b 100644 (file)
--- a/doc/install/install-sso.html.textile.liquid
+++ b/doc/install/install-sso.html.textile.liquid
@@ -6,52 +6,107 @@ title: Install Single Sign On (SSO) server
  
  h2(#dependencies). Install dependencies
  
-Make sure you have "Ruby and Bundler":install-manual-prerequisites-ruby.html installed.
+h3(#install_git_curl). Install git and curl
+
+{% include 'install_git_curl' %}
+
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
+
+{% include 'install_ruby_and_bundler' %}
+
+h3(#install_postgres). Install PostgreSQL
+
+{% include 'install_postgres' %}
  
  h2(#install). Install SSO server
  
-h3. Get SSO server code and create database
+h3. Get SSO server code and run bundle
  
  <notextile>
  <pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
  ~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
  ~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">bundle install</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:create</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:migrate</span>
-</code></pre>
-</notextile>
+~/sso-devise-omniauth-provider$ <span class="userinput">bundle install --without=development</span>
+</code></pre></notextile>
+
+h2. Configure the SSO server
+
+First, copy the example configuration file:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
+</code></pre></notextile>
+
+The SSO server reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the SSO server and is provided for installation convenience only.
+
+Consult @config/application.default.yml@ for a full list of configuration options.  Local configuration goes in @config/application.yml@, do not edit @config/application.default.yml@.
+
+h3(#uuid_prefix). uuid_prefix
+
+Generate a uuid prefix for the single sign on service.  This prefix is used to identify user records as originating from this site.  It must be exactly 5 lowercase ASCII letters and/or digits.  You may use the following snippet to generate a uuid prefix:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts "#{rand(2**64).to_s(36)[0,5]}"'</span>
+abcde
+</code></pre></notextile>
  
-h3. Configure Rails secret
+Edit @config/application.yml@ and set @uuid_prefix@ in the "common" section.
  
-Create a secret:
+h3(#secret_token). secret_token
+
+Generate a new secret token for signing cookies:
  
  <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/initializers/secret_token.rb.example config/initializers/secret_token.rb</span>
-~/sso-devise-omniauth-provider$ <span class="userinput">rake secret</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
  zzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzzz
-</code></pre>
-</notextile>
+</code></pre></notextile>
  
-Edit @config/initializers/secret_token.rb@ to set @config.secret_token@ to the string produced by @rake secret@.
+Edit @config/application.yml@ and set @secret_token@ in the "common" section.
  
-h3. Configure upstream authentication provider
+h2(#database). Set up the database
  
-This will enable users to log in using their existing Google accounts.  If you don't want to use Google for account services, you can also "add accounts manually.":#manual-accounts
+Generate a new database password. Nobody ever needs to memorize it or type it, so make a strong one:
  
  <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/environments/production.rb.example config/environments/production.rb</span>
-</code></pre>
-</notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**128).to_s(36)'</span>
+abcdefghijklmnopqrstuvwxyz012345689
+</code></pre></notextile>
  
-Edit @config/environments/production.rb@ to set @config.google_oauth2_client_id@ and @config.google_oauth2_client_secret@.  See "Omniauth Google OAuth2 gem documentation":https://github.com/zquestz/omniauth-google-oauth2 and "Using OAuth 2.0 to Access Google APIs":https://developers.google.com/accounts/docs/OAuth2 for information about using the "Google Developers Console":https://console.developers.google.com to get a Google client id and client secret.
+Create a new database user with permission to create its own databases.
  
-h3(#client). Create arvados-server client
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createuser --createdb --encrypted -R -S --pwprompt arvados_sso</span>
+Enter password for new role: <span class="userinput">paste-database-password-you-generated</span>
+Enter it again: <span class="userinput">paste-database-password-you-generated</span>
+</code></pre></notextile>
+
+Configure SSO server to connect to your database by creating and updating @config/database.yml@. Replace the @xxxxxxxx@ database password placeholders with the new password you generated above.  If you are planning a production system, update the @production@ section, otherwise use @development@.
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">cp -i config/database.yml.example config/database.yml</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">edit config/database.yml</span>
+</code></pre></notextile>
  
-Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the @APP_ID@ and @APP_SECRET@ that must be set in in "Setting up Omniauth in the API server.":install-api-server.html#omniauth
+Create and initialize the database. If you are planning a production system, choose the @production@ rails environment, otherwise use @development@.
  
  <notextile>
-<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">rake secret</span>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:setup</span>
+</code></pre></notextile>
+
+Alternatively, if the database user you intend to use for the SSO server is not allowed to create new databases, you can create the database first and then populate it with rake. Be sure to adjust the database name if you are using the @development@ environment. This sequence of commands is functionally equivalent to the rake db:setup command above:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">sudo -u postgres createdb arvados_sso_production -E UTF8 -O arvados_sso -T template0</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:schema:load</span>
+~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake db:seed</span>
+</code></pre></notextile>
+
+h2(#client). Create arvados-server client
+
+Use @rails console@ to create a @Client@ record that will be used by the Arvados API server.  The values of @app_id@ and @app_secret@ correspond to the values for @sso_app_id@ and @sso_app_secret@ in the "API server's SSO settings.":install-api-server.html#omniauth
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
  xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  ~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
  :001 &gt; <span class="userinput">c = Client.new</span>
@@ -63,9 +118,36 @@ xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
  </code></pre>
  </notextile>
  
-h2(#manual-accounts). Adding user accounts manually
+h2(#assets). Precompile assets
  
-Instead of relying on an upstream authentication such as Google, you can create accounts on the SSO server manually.
+If you are running in the production environment, you must precompile the assets:
+
+<notextile>
+<pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rake assets:precompile</span>
+</code></pre>
+</notextile>
+
+h2(#authentication_methods). Authentication methods
+
+Authentication methods are configured in @application.yml@.  Currently three authentication methods are supported: local accounts, LDAP, and Google+.  If neither Google+ nor LDAP are enabled, the SSO server defaults to local user accounts.   Only one authentication mechanism should be in use at a time.
+
+h3(#local_accounts). Local account authentication
+
+There are two configuration options for local accounts:
+
+<pre>
+  # If true, allow new creation of new accounts in the SSO server's internal
+  # user database.
+  allow_account_registration: false
+
+  # If true, send an email confirmation before activating new accounts in the
+  # SSO server's internal user database (otherwise users are activated immediately.)
+  require_email_confirmation: false
+</pre>
+
+For more information about configuring backend support for sending email (required to send email confirmations) see "Configuring Action Mailer":http://guides.rubyonrails.org/configuring.html#configuring-action-mailer
+
+If @allow_account_registration@ is false, you may manually create local accounts on the SSO server from the rails console:
  
  <notextile>
  <pre><code>~/sso-devise-omniauth-provider$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
@@ -76,26 +158,128 @@ Instead of relying on an upstream authentication such as Google, you can create
  </code></pre>
  </notextile>
  
-To log in using a manually created account:
+h3(#ldap). LDAP authentication
+
+The following options are available to configure LDAP authentication.  Note that you must preserve the indentation of the fields listed under @use_ldap@.
+
+<pre>
+  use_ldap:
+    title: Example LDAP
+    host: ldap.example.com
+    port: 636
+    method: ssl
+    base: "ou=Users, dc=example, dc=com"
+    uid: uid
+    email_domain: example.com
+    #bind_dn: "some_user"
+    #password: "some_password"
+</pre>
  
-# Go to https://auth.your.domain/users/sign_in
-# Enter the email address and password and click on "Sign in"
-# You will arrive at a page "You are now signed in as test@example.com"
-# Go to https://workbench.@uuid_prefix@.your.domain/
-# Click on the Workbench "Log in" button.
-# You should now be logged in to Workbench.  Confirm by looking for the email address displayed in the upper right.
+table(table).
+|_. Option|_. Description|
+|title |Title displayed to the user on the login page|
+|host  |LDAP server hostname|
+|port  |LDAP server port|
+|method|One of "plain", "ssl", "tls"|
+|base  |Directory lookup base|
+|uid   |User id field used for directory lookup|
+|email_domain|Strip off specified email domain from login and perform lookup on bare username|
+|bind_dn|If required by server, username to log with in before performing directory lookup|
+|password|If required by server, password to log with before performing directory lookup|
  
-h2. Start the SSO server
+h3(#google). Google+ authentication
+
+In order to use Google+ authentication, you must use the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> to create a set of client credentials.
+
+# Go to the <a href="https://console.developers.google.com" target="_blank">Google Developers Console</a> and select or create a project; this will take you to the project page.
+# On the sidebar, click on *APIs & auth* then select *APIs*.
+## Search for *Contacts API* and click on *Enable API*.
+## Search for *Google+ API* and click on *Enable API*.
+# On the sidebar, click on *Credentials*; under *OAuth* click on *Create new Client ID* to bring up the *Create Client ID* dialog box.
+# Under *Application type* select *Web application*.
+# If the authorization origins are not displayed, clicking on *Create Client ID* will take you to *Consent screen* settings.
+## On consent screen settings, enter the appropriate details and click on *Save*.
+## This will return you to the *Create Client ID* dialog box.
+# You must set the authorization origins.  Edit @sso.your-site.com@ to the appropriate hostname that you will use to access the SSO service:
+## JavaScript origin should be @https://sso.your-site.com/@
+## Redirect URI should be @https://sso.your-site.com/users/auth/google_oauth2/callback@
+# Copy the values of *Client ID* and *Client secret* from the Google Developers Console into the Google section of @config/application.yml@, like this:
+
+<notextile>
+<pre><code>  # Google API tokens required for OAuth2 login.
+  google_oauth2_client_id: <span class="userinput">"---YOUR---CLIENT---ID---HERE--"-</span>
+  google_oauth2_client_secret: <span class="userinput">"---YOUR---CLIENT---SECRET---HERE--"-</span></code></pre></notextile>
  
-h3. Run a simple standalone server
+h2(#start). Set up a Web server
  
-You can use the Webrick server that is bundled with Ruby to quickly verify that your installation is functioning:
+For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend to serve the SSO server.  To do that:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rails server</span>
+<ol>
+<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
+
+<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for it.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
+
+<pre><code>server {
+  listen 127.0.0.1:8900;
+  server_name localhost-sso;
+
+  root   <span class="userinput">/YOUR/PATH/TO/sso-devise-omniauth-provider/public</span>;
+  index  index.html index.htm index.php;
+
+  passenger_enabled on;
+  # If you're using RVM, uncomment the line below.
+  #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream sso {
+  server     127.0.0.1:8900  fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  auth.<span class="userinput">your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html index.htm index.php;
+
+  location / {
+    proxy_pass            http://sso;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      Host $http_host;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
  </code></pre>
+</li>
+
+<li>Restart Nginx.</li>
+
+</ol>
  </notextile>
  
-h3. Production environment
+{% include 'notebox_begin' %}
+
+If you see the following warning "you may safely ignore it":https://stackoverflow.com/questions/10374871/no-secret-option-provided-to-racksessioncookie-warning:
+
+<pre>
+SECURITY WARNING: No secret option provided to Rack::Session::Cookie.
+This poses a security threat. It is strongly recommended that you
+provide a secret to prevent exploits that may be possible from crafted
+cookies. This will not be supported in future versions of Rack, and
+future versions will even invalidate your existing user cookies.
+
+Called from: /var/lib/gems/2.1.0/gems/actionpack-3.2.8/lib/action_dispatch/middleware/session/abstract_store.rb:28:in `initialize'.
+</pre>
  
-As a Ruby on Rails application, the SSO server should be compatible with any Ruby application server that supports Rack applications.  We recommend "Passenger":https://www.phusionpassenger.com/ to run the SSO server in production.
+{% include 'notebox_end' %}
diff --git a/doc/install/install-workbench-app.html.textile.liquid b/doc/install/install-workbench-app.html.textile.liquid

index 43e6f418737d5d715601d6bdf70789e3f089f723..52a69f502b1d6e65ad920230ab91599f86be0b19 100644 (file)
--- a/doc/install/install-workbench-app.html.textile.liquid
+++ b/doc/install/install-workbench-app.html.textile.liquid
@@ -4,86 +4,57 @@ navsection: installguide
  title: Install Workbench
  ...
  
-This installation guide assumes you are on a 64 bit Debian or Ubuntu system.
-
  h2. Install prerequisites
  
-<notextile>
-<pre><code>~$ <span class="userinput">sudo apt-get install \
-    bison build-essential gettext libcurl3 libcurl3-gnutls \
-    libcurl4-openssl-dev libpcre3-dev libpq-dev libreadline-dev \
-    libssl-dev libxslt1.1 git wget zlib1g-dev graphviz libsqlite3-dev
-</span></code></pre></notextile>
-
-Also make sure you have "Ruby and bundler":install-manual-prerequisites-ruby.html installed.
+The Arvados package repository includes Workbench server package that can help automate much of the deployment.
  
-Workbench doesn't need its own database, so it does not need to have PostgreSQL installed.
+h3(#install_ruby_and_bundler). Install Ruby and Bundler
  
-h2. Download the source tree
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
-</code></pre></notextile>
+{% include 'install_ruby_and_bundler' %}
  
-See also: "Downloading the source code":https://arvados.org/projects/arvados/wiki/Download on the Arvados wiki.
+h2(#install_workbench). Install Workbench and dependencies
  
-The Workbench application is in @apps/workbench@ in the source tree.
+Workbench doesn't need its own database, so it does not need to have PostgreSQL installed.
  
-h2. Install gem dependencies
+On a Debian-based system, install the following packages:
  
  <notextile>
-<pre><code>~$ <span class="userinput">cd arvados/apps/workbench</span>
-~/arvados/apps/workbench$ <span class="userinput">bundle install</span>
+<pre><code>~$ <span class="userinput">sudo apt-get install bison build-essential graphviz git python-arvados-python-client arvados-workbench</span>
  </code></pre>
  </notextile>
  
-Alternatively, if you don't have sudo/root privileges on the host, install the gems in your own directory instead of installing them system-wide:
+On a Red Hat-based system, install the following packages:
  
  <notextile>
-<pre><code>~$ <span class="userinput">cd arvados/apps/workbench</span>
-~/arvados/apps/workbench$ <span class="userinput">bundle install --path=vendor/bundle</span>
-</code></pre></notextile>
-
-The @bundle install@ command might produce a warning about the themes_for_rails gem. This is OK:
-
-<notextile>
-<pre><code>themes_for_rails at /home/<b>you</b>/.rvm/gems/ruby-2.1.1/bundler/gems/themes_for_rails-1fd2d7897d75 did not have a valid gemspec.
-This prevents bundler from installing bins or native extensions, but that may not affect its functionality.
-The validation message from Rubygems was:
-  duplicate dependency on rails (= 3.0.11, development), (>= 3.0.0) use:
-    add_runtime_dependency 'rails', '= 3.0.11', '>= 3.0.0'
-Using themes_for_rails (0.5.1) from https://github.com/holtkampw/themes_for_rails (at 1fd2d78)
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo yum install bison make automake gcc gcc-c++ graphviz git python27-python-arvados-python-client arvados-workbench</span>
+</code></pre>
+</notextile>
  
-h2. Choose your environment
+{% include 'note_python27_sc' %}
  
-The Workbench application can be run in @development@ or in @production@ mode. Unless this installation is going to be used for development on the Workbench applicatoin itself, you should run it in @production@ mode.
+h2. Set up configuration files
  
-Copy the example environment file for your environment. For example, if you choose @production@:
+The Workbench server package uses configuration files that you write to @/etc/arvados/workbench@ and ensures they're consistently deployed.  Create this directory and copy the example configuration files to it:
  
  <notextile>
-<pre><code>~/arvados/apps/workbench$ <span class="userinput">cp -i config/environments/production.rb.example config/environments/production.rb</span>
-</code></pre></notextile>
+<pre><code>~$ <span class="userinput">sudo mkdir -p /etc/arvados/workbench</span>
+~$ <span class="userinput">sudo chmod 700 /etc/arvados/workbench</span>
+~$ <span class="userinput">sudo cp /var/www/arvados-workbench/current/config/application.yml.example /etc/arvados/workbench/application.yml</span>
+</code></pre>
+</notextile>
  
-h2. Configure the Workbench application
+h2. Configure Workbench
  
-First, copy the example configuration file:
+Edit @/etc/arvados/workbench/application.yml@ following the instructions below.  The deployment script will consistently deploy this to Workbench's configuration directory.  Workbench reads both @application.yml@ and its own @config/application.defaults.yml@ file.  Values in @application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@.  The @config/application.yml.example@ file is not read by Workbench and is provided for installation convenience only.
  
-<notextile>
-<pre><code>~/arvados/apps/workbench$ <span class="userinput">cp -i config/application.yml.example config/application.yml</span>
-</code></pre></notextile>
-
-The Workbench application reads the @config/application.yml@ file, as well as the @config/application.defaults.yml@ file. Values in @config/application.yml@ take precedence over the defaults that are defined in @config/application.defaults.yml@. The @config/application.yml.example@ file is not read by the Workbench application and is provided for installation convenience, only.
-
-Consult @config/application.default.yml@ for a full list of configuration options. Always put your local configuration in @config/application.yml@, never edit @config/application.default.yml@.
+Consult @config/application.default.yml@ for a full list of configuration options.  Always put your local configuration in @/etc/arvados/workbench/application.yml@&mdash;never edit @config/application.default.yml@.
  
  h3. secret_token
  
  This application needs a secret token. Generate a new secret:
  
  <notextile>
-<pre><code>~/arvados/apps/workbench$ <span class="userinput">rake secret</span>
+<pre><code>~$ <span class="userinput">ruby -e 'puts rand(2**400).to_s(36)'</span>
  aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa
  </code></pre>
  </notextile>
@@ -108,27 +79,99 @@ h3. arvados_insecure_https
  
  If the SSL certificate you use for your API server isn't an official certificate signed by a CA, make sure @arvados_insecure_https@ is @true@.
  
-h3. other options
+h3. Other options
  
  Consult @application.default.yml@ for a full list of configuration options. Always put your local configuration in @application.yml@ instead of editing @application.default.yml@.
  
-Copy @config/piwik.yml.example@ to @config/piwik.yml@ and edit to suit.
+h2. Configure Piwik
  
-h2. Start the Workbench application
+In @/var/www/arvados-workbench/current/config@, copy @piwik.yml.example@ to @piwik.yml@ and edit to suit.
  
-h3. Development environment
+h2. Set up Web server
  
-If you plan to run in development mode, you can now run the development server this way:
+For best performance, we recommend you use Nginx as your Web server front-end, with a Passenger backend to serve Workbench.  To do that:
  
  <notextile>
-<pre><code>~/arvados/apps/workbench$ <span class="userinput">bundle exec rails server --port=3031</span>
-</code></pre></notextile>
+<ol>
+<li><a href="https://www.phusionpassenger.com/documentation/Users%20guide%20Nginx.html">Install Nginx and Phusion Passenger</a>.</li>
  
-h3. Production environment
+<li>If you're deploying on CentOS and using the python27 Software Collection, configure Nginx to use it:
  
-We recommend "Passenger":https://www.phusionpassenger.com/ to run the API server in production.
+<pre><code>~$ <span class="userinput">sudo usermod --shell /bin/bash nginx</span>
+~$ <span class="userinput">sudo -u nginx sh -c 'echo "[[ -z \$PS1 && -e /opt/rh/python27/enable ]] && source /opt/rh/python27/enable" >>~/.bash_profile'</span>
+</code></pre>
+
+</li>
+
+<li><p>Edit the http section of your Nginx configuration to run the Passenger server, and act as a front-end for it.  You might add a block like the following, adding SSL and logging parameters to taste:</p>
+
+<pre><code>server {
+  listen 127.0.0.1:9000;
+  server_name localhost-workbench;
+
+  root /var/www/arvados-workbench/current/public;
+  index  index.html index.htm index.php;
+
+  passenger_enabled on;
+  # If you're using RVM, uncomment the line below.
+  #passenger_ruby /usr/local/rvm/wrappers/default/ruby;
+}
+
+upstream workbench {
+  server     127.0.0.1:9000  fail_timeout=10s;
+}
+
+proxy_http_version 1.1;
+
+server {
+  listen       <span class="userinput">[your public IP address]</span>:443 ssl;
+  server_name  workbench.<span class="userinput">uuid-prefix.your.domain</span>;
+
+  ssl on;
+  ssl_certificate     <span class="userinput">/YOUR/PATH/TO/cert.pem</span>;
+  ssl_certificate_key <span class="userinput">/YOUR/PATH/TO/cert.key</span>;
+
+  index  index.html index.htm index.php;
+  client_max_body_size 128m;
+
+  location / {
+    proxy_pass            http://workbench;
+    proxy_redirect        off;
+    proxy_connect_timeout 90s;
+    proxy_read_timeout    300s;
+
+    proxy_set_header      X-Forwarded-Proto https;
+    proxy_set_header      Host $http_host;
+    proxy_set_header      X-Real-IP $remote_addr;
+    proxy_set_header      X-Forwarded-For $proxy_add_x_forwarded_for;
+  }
+}
+</code></pre>
+</li>
+
+<li>Restart Nginx.</li>
+
+</ol>
+</notextile>
+
+h2. Prepare the Workbench deployment
+
+Now that all your configuration is in place, run @/usr/local/bin/arvados-workbench-upgrade.sh@.  This will install and check your configuration, and install necessary gems.
+
+{% include 'notebox_begin' %}
+You can safely ignore the following error message you may see when installing gems:
+<notextile>
+<pre><code>themes_for_rails at /usr/local/rvm/gems/ruby-2.1.1/bundler/gems/themes_for_rails-1fd2d7897d75 did not have a valid gemspec.
+This prevents bundler from installing bins or native extensions, but that may not affect its functionality.
+The validation message from Rubygems was:
+  duplicate dependency on rails (= 3.0.11, development), (>= 3.0.0) use:
+    add_runtime_dependency 'rails', '= 3.0.11', '>= 3.0.0'
+Using themes_for_rails (0.5.1) from https://github.com/holtkampw/themes_for_rails (at 1fd2d78)
+</code></pre>
+</notextile>
+{% include 'notebox_end' %}
  
-Point it to the apps/workbench directory in the source tree.
+This command aborts when it encounters an error.  It's safe to rerun multiple times, so if there's a problem with your configuration, you can fix that and try again.
  
  h2. Trusted client setting
  
@@ -136,7 +179,7 @@ Log in to Workbench once to ensure that the Arvados API server has a record of t
  
  In the <strong>API server</strong> project root, start the rails console.  Locate the ApiClient record for your Workbench installation (typically, while you're setting this up, the @last@ one in the database is the one you want), then set the @is_trusted@ flag for the appropriate client record:
  
-<notextile><pre><code>~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+<notextile><pre><code>/var/www/arvados-api/current$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
  irb(main):001:0&gt; <span class="userinput">wb = ApiClient.all.last; [wb.url_prefix, wb.created_at]</span>
  =&gt; ["https://workbench.example.com/", Sat, 19 Apr 2014 03:35:12 UTC +00:00]
  irb(main):002:0&gt; <span class="userinput">include CurrentApiClient</span>
@@ -151,11 +194,10 @@ h2(#admin-user). Add an admin user
  Next, we're going to use the rails console on the <strong>API server</strong> to activate our own account and give yourself admin privileges:
  
  <notextile>
-<pre><code>~/arvados/services/api$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
+<pre><code>/var/www/arvados-api/current$ <span class="userinput">RAILS_ENV=production bundle exec rails console</span>
  irb(main):001:0&gt; <span class="userinput">Thread.current[:user] = User.all.select(&:identity_url).last</span>
-irb(main):002:0&gt; <span class="userinput">Thread.current[:user].is_admin = true</span>
-irb(main):003:0&gt; <span class="userinput">Thread.current[:user].update_attributes is_admin: true, is_active: true</span>
-irb(main):004:0&gt; <span class="userinput">User.where(is_admin: true).collect &:email</span>
+irb(main):002:0&gt; <span class="userinput">Thread.current[:user].update_attributes is_admin: true, is_active: true</span>
+irb(main):003:0&gt; <span class="userinput">User.where(is_admin: true).collect &:email</span>
  =&gt; ["root", "<b>your_address@example.com</b>"]
  </code></pre></notextile>
  
diff --git a/doc/install/pre-built-docker.html.textile.liquid b/doc/install/pre-built-docker.html.textile.liquid

index fb87195e3e860027adb6b325beedf07916bae990..9e668cd5b411bfcb63b319c13c9b65eaa0409c9f 100644 (file)
--- a/doc/install/pre-built-docker.html.textile.liquid
+++ b/doc/install/pre-built-docker.html.textile.liquid
@@ -1,17 +1,26 @@
  ---
  layout: default
  navsection: installguide
-title: Install pre-built docker images
+title: Install pre-built Docker images
  ...
  
+This method is intended for evaluation and development on a local workstation. It is not suitable for production use in a cluster deployment.
+
  {% include 'notebox_begin' %}
-This method is easy, but is not yet suitable for production use. It is still in development: some features do not work yet.
-* Websockets service is not enabled. This means Workbench auto-refresh and web upload (and some other features) do not work.
-* The node manager is not enabled. Two worker containers are brought up at startup.
-* The automatic network configuration allows you to log in to Workbench from a browser _running on the same host as docker_. Connecting from other hosts will require additional configuration (not covered here).
+* The automatic network configuration allows you to log in to Workbench from a browser _running on the same host as Docker_. Connecting from other hosts requires additional configuration (not covered here).
+* Your data will be stored inside the Docker containers.  You may stop and restart the containers without loss, but if you delete the container, your data will be gone.
+* Updating the Arvados software inside the Docker containers is not supported.  You may download updated Docker images, but migrating data to updated containers is not yet supported.
  {% include 'notebox_end' %}
  
-First, make sure that @curl@ and @docker@ are installed on your system, and that you are in the docker group (see "Installing Docker":https://docs.docker.com/installation/).
+h2. Prerequisites
+
+# A GNU/Linux x64 (virtual) machine
+# A working Docker installation (see "Installing Docker":https://docs.docker.com/installation/)
+# curl
+
+h2. Verify prerequisites
+
+Make sure that @curl@ and @docker@ are installed on your system, and that you are in the docker group (see "Installing Docker":https://docs.docker.com/installation/).
  
  <notextile><pre><code>~$ <span class="userinput">which curl</span>
  /usr/bin/curl
@@ -21,12 +30,41 @@ Docker version 1.2.0-dev, build dc243c8
  yourusername sudo fuse docker
  </code></pre></notextile>
  
-Download and install Arvados.
+h2. Download and install Arvados.
  
  <notextile>
  <pre><code>~$ <span class="userinput">\curl -sSL get.arvados.org | bash</span>
  </code></pre></notextile>
  
-This command will download the latest copy of the Arvados docker images. It also gets the arvdock command and saves it in the current working directory. It then uses arvdock to spin up Arvados. Depending on the speed of your internet connection, it can take a while to download the Arvados docker images.
+This command will download the latest build of the Arvados docker images. It also gets the @arvdock@ command and saves it in the current working directory. It then uses @arvdock@ to spin up Arvados. Note that the Arvados Docker images are large and may take a while to download.
+
+If you prefer, you can also download and inspect the installation script before running it. @get.arvados.org@ redirects to "https://raw.githubusercontent.com/curoverse/arvados-dev/master/install/easy-docker-install.sh":https://raw.githubusercontent.com/curoverse/arvados-dev/master/install/easy-docker-install.sh, which is the installation script.
+
+The @arvdock@ command usage is listed here:
+
+<pre>
+usage: ./arvdock (start|stop|restart|reset|test) [options]
+
+start    run new or restart stopped arvados containers
+stop     stop arvados containers
+restart  stop and then start arvados containers
+reset    stop and delete containers WARNING: this will delete the data inside Arvados!
+test     run tests
+
+./arvdock start/stop/restart options:
+  -d[port], --doc[=port]        Documentation server (default port 9898)
+  -w[port], --workbench[=port]  Workbench server (default port 9899)
+  -s[port], --sso[=port]        SSO server (default port 9901)
+  -a[port], --api[=port]        API server (default port 9900)
+  -c, --compute                 Compute nodes (starts 2)
+  -v, --vm                      Shell server
+  -n, --nameserver              Nameserver
+  -k, --keep                    Keep servers
+  -p, --keepproxy               Keepproxy server
+  -h, --help                    Display this help and exit
+
+  If no options are given, the action is applied to all servers.
  
-If you prefer, you can also download the installation script and inspect it before running it. @get.arvados.org@ redirects to "https://raw.githubusercontent.com/curoverse/arvados-dev/master/install/easy-docker-install.sh":https://raw.githubusercontent.com/curoverse/arvados-dev/master/install/easy-docker-install.sh, which is the installation script.
+./arvdock test [testname] [testname] ...
+  By default, all tests are run.
+</pre>
diff --git a/doc/sdk/cli/install.html.textile.liquid b/doc/sdk/cli/install.html.textile.liquid

index df5507702443103be54c9db56122404c9de9b05b..9db56b9bbd1bdad70711e0505a0c6fdb85634fd7 100644 (file)
--- a/doc/sdk/cli/install.html.textile.liquid
+++ b/doc/sdk/cli/install.html.textile.liquid
@@ -8,9 +8,9 @@ title: "Installation"
  
  To use the @arv@ command, you can either install the @arvados-cli@ gem via RubyGems or build and install the package from source.
  
-h4. Prerequisites: Ruby &gt;= 2.1.0 and curl libraries
+h3. Prerequisites: Ruby, Bundler, and curl libraries
  
-Make sure you have "Ruby and bundler":{{site.baseurl}}/install/install-manual-prerequisites-ruby.html installed.
+{% include 'install_ruby_and_bundler' %}
  
  Install curl libraries with your system's package manager. For example, on Debian or Ubuntu:
  
@@ -20,7 +20,7 @@ $ <code class="userinput">sudo apt-get install libcurl3 libcurl3-gnutls libcurl4
  </pre>
  </notextile>
  
-h4. Option 1: install with RubyGems
+h3. Option 1: Install with RubyGems
  
  <notextile>
  <pre>
@@ -28,7 +28,7 @@ $ <code class="userinput">sudo gem install arvados-cli</code>
  </pre>
  </notextile>
  
-h4. Option 2: build and install from source
+h3. Option 2: Build and install from source
  
  <notextile>
  <pre>
diff --git a/doc/sdk/cli/subcommands.html.textile.liquid b/doc/sdk/cli/subcommands.html.textile.liquid

index c7655ba78e9a22b7068d02fba1ea9565df5f9d13..3184c0929c2add50e6140a0f7d94696418ad32c2 100644 (file)
--- a/doc/sdk/cli/subcommands.html.textile.liquid
+++ b/doc/sdk/cli/subcommands.html.textile.liquid
@@ -387,7 +387,7 @@ optional arguments:
                          exit
    --local               Run locally using arv-run-pipeline-instance
    --docker-image DOCKER_IMAGE
-                        Docker image to use, default arvados/jobs
+                        Docker image to use, otherwise use instance default.
    --ignore-rcode        Commands that return non-zero return codes should not
                          be considered failed.
    --no-reuse            Do not reuse past jobs.
diff --git a/doc/sdk/perl/index.html.textile.liquid b/doc/sdk/perl/index.html.textile.liquid

index 448cbb1ede54814a6db5285a9ffc66b92e4e2cb8..e28d02011b8d559e7954bc7c5326a812c42b9adb 100644 (file)
--- a/doc/sdk/perl/index.html.textile.liquid
+++ b/doc/sdk/perl/index.html.textile.liquid
@@ -15,24 +15,46 @@ It should be treated as alpha/experimental. Currently, limitations include:
  
  h3. Installation
  
+h4. Option 1: Install from distribution packages
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
+
+On Debian-based systems:
+
  <notextile>
-<pre>
-$ <code class="userinput">sudo apt-get install libjson-perl libio-socket-ssl-perl libwww-perl libipc-system-simple-perl</code>
-$ <code class="userinput">git clone https://github.com/curoverse/arvados.git</code>
-$ <code class="userinput">cd arvados/sdk/perl</code>
-$ <code class="userinput">perl Makefile.PL</code>
-$ <code class="userinput">sudo make install</code>
-</pre>
+<pre><code>~$ <span class="userinput">sudo apt-get install libjson-perl libio-socket-ssl-perl libwww-perl libipc-system-simple-perl libarvados-perl</code>
+</code></pre>
+</notextile>
+
+On Red Hat-based systems:
+
+<notextile>
+<pre><code>~$ <span class="userinput">sudo yum install perl-ExtUtils-MakeMaker perl-JSON perl-IO-Socket-SSL perl-Crypt-SSLeay perl-WWW-Curl libarvados-perl</code>
+</code></pre>
+</notextile>
+
+h4. Option 2: Install from source
+
+First, install dependencies from your distribution.  Refer to the package lists above, but don't install @libarvados-perl@.
+
+Then run the following:
+
+<notextile>
+<pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
+~$ <span class="userinput">cd arvados/sdk/perl</span>
+~$ <span class="userinput">perl Makefile.PL</span>
+~$ <span class="userinput">sudo make install</span>
+</code></pre>
  </notextile>
  
-h4. Test installation
+h3. Test installation
  
  If the SDK is installed, @perl -MArvados -e ''@ should produce no errors.
  
  If your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables are set up correctly (see "api-tokens":{{site.baseurl}}/user/reference/api-tokens.html for details), the following test script should work:
  
  <notextile>
-<pre>$ <code class="userinput">perl &lt;&lt;'EOF'
+<pre>~$ <code class="userinput">perl &lt;&lt;'EOF'
  use Arvados;
  my $arv = Arvados-&gt;new('apiVersion' => 'v1');
  my $me = $arv-&gt;{'users'}-&gt;{'current'}-&gt;execute;
diff --git a/doc/sdk/python/sdk-python.html.textile.liquid b/doc/sdk/python/sdk-python.html.textile.liquid

index ead804e8d0eee5216edf951584cb4d45dee2943d..75769d7e2a9f1c0c09b17994992be34ab396f713 100644 (file)
--- a/doc/sdk/python/sdk-python.html.textile.liquid
+++ b/doc/sdk/python/sdk-python.html.textile.liquid
@@ -14,66 +14,58 @@ h3. Installation
  
  If you are logged in to an Arvados VM, the Python SDK should be installed.
  
-To use the Python SDK elsewhere, you can either install the Python SDK via PyPI or build and install the package using the arvados source tree.
+To use the Python SDK elsewhere, you can install from a distribution package, PyPI, or source.
  
  {% include 'notebox_begin' %}
-The Python SDK requires Python 2.7
+The Python SDK requires Python 2.7.
  {% include 'notebox_end' %}
  
-h4. Option 1: install with PyPI
+h4. Option 1: Install from distribution packages
+
+First, "add the appropriate package repository for your distribution":{{ site.baseurl }}/install/install-manual-prerequisites.html#repos.
+
+On Debian-based systems:
  
  <notextile>
-<pre>
-$ <code class="userinput">sudo apt-get install python-pip python-dev libattr1-dev libfuse-dev pkg-config python-yaml</code>
-$ <code class="userinput">sudo pip install arvados-python-client</code>
-</pre>
+<pre><code>~$ <span class="userinput">sudo apt-get install python-arvados-python-client</code>
+</code></pre>
  </notextile>
  
-_If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, fix it by adding a @--pre@ flag:_
+On Red Hat-based systems:
  
  <notextile>
-<pre>
-$ <code class="userinput">sudo pip install --pre arvados-python-client</code>
-</pre>
+<pre><code>~$ <span class="userinput">sudo yum install python27-python-arvados-python-client</code>
+</code></pre>
  </notextile>
  
-h4. Option 2: install from distribution packages (Debian/Ubuntu only)
+{% include 'note_python27_sc' %}
  
-First add @http://apt.arvados.org@ to your list of apt repositories:
+h4. Option 2: Install with pip
  
-<notextile>
-<pre>
-$ <code class="userinput">echo "deb http://apt.arvados.org/ wheezy main" | sudo tee /etc/apt/sources.list.d/apt.arvados.org.list</code>
-</pre>
-</notextile>
+Run @pip-2.7 install arvados-python-client@ in an appropriate installation environment, such as a virtualenv.
  
-Then install the package:
+If your version of @pip@ is 1.4 or newer, the @pip install@ command might give an error: "Could not find a version that satisfies the requirement arvados-python-client". If this happens, try @pip-2.7 install --pre arvados-python-client@.
  
-<notextile>
-<pre>
-$ <code class="userinput">sudo apt-get update</code>
-$ <code class="userinput">sudo apt-get install python-arvados-python-client</code>
-</pre>
-</notextile>
+h4. Option 3: Install from source
  
-h4. Option 3: build and install from source
+Install the @python-setuptools@ package from your distribution.  Then run the following:
  
  <notextile>
-<pre>
-~$ <code class="userinput">sudo apt-get install python-dev libattr1-dev libfuse-dev pkg-config</code>
-~$ <code class="userinput">git clone https://github.com/curoverse/arvados.git</code>
-~$ <code class="userinput">cd arvados/sdk/python</code>
-~/arvados/sdk/python$ <code class="userinput">sudo python setup.py install</code>
-</pre>
+<pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
+~$ <span class="userinput">cd arvados/sdk/python</span>
+~$ <span class="userinput">python2.7 setup.py install</span>
+</code></pre>
  </notextile>
  
+You may optionally run the final installation command in a virtualenv, or with the @--user@ option.
+
  h4. Test installation
  
  If the SDK is installed and your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ environment variables are set up correctly (see "api-tokens":{{site.baseurl}}/user/reference/api-tokens.html for details), @import arvados@ should produce no errors:
  
  <notextile>
-<pre>$ <code class="userinput">python</code>
-Python 2.7.4 (default, Sep 26 2013, 03:20:26) 
+<pre>~$ <code class="userinput">python2.7</code>
+Python 2.7.4 (default, Sep 26 2013, 03:20:26)
  [GCC 4.7.3] on linux2
  Type "help", "copyright", "credits" or "license" for more information.
  >>> <code class="userinput">import arvados</code>
@@ -160,4 +152,3 @@ j = arv.jobs().list().execute()
  </notextile>
  
  The SDK retrieves the list of API methods from the server at run time. Therefore, the set of available methods is determined by the server version rather than the SDK version.
-
diff --git a/doc/start/getting_started/publicproject.html.textile.liquid b/doc/start/getting_started/publicproject.html.textile.liquid

new file mode 100644 (file)

index 0000000..a00192d
--- /dev/null
+++ b/doc/start/getting_started/publicproject.html.textile.liquid
@@ -0,0 +1,128 @@
+---
+layout: default
+navsection: start
+title: Visit an Arvados Public Project
+...
+
+h2. <a href="https://workbench.qr1hi.arvadosapi.com/projects/qr1hi-j7d0g-662ij1pcw6bj8uj">Mason Lab - Pathomap / Ancestry Mapper (Public)</a>
+
+You can see Arvados in action by accessing the <a href="https://workbench.qr1hi.arvadosapi.com/projects/qr1hi-j7d0g-662ij1pcw6bj8uj">Mason Lab - Pathomap / Ancestry Mapper (Public) project</a>. By visiting this project, you can see what an Arvados project is, access data collections in this project, and click through a pipeline instance's contents.
+
+You will be accessing this project in read-only mode and will not be able to make any modifications such as running a new pipeline instance.
+
+<div id="carousel-publicproject" class="carousel slide" data-interval="false">
+  <!-- Indicators -->
+  <ol class="carousel-indicators">
+    <li data-target="#carousel-publicproject" data-slide-to="0" class="active"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="1"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="2"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="3"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="4"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="5"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="6"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="7"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="8"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="9"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="10"></li>
+    <li data-target="#carousel-publicproject" data-slide-to="11"></li>
+  </ol>
+
+  <!-- Wrapper for slides -->
+  <div class="carousel-inner" role="listbox">
+    <div class="item active">
+      <img src="{{ site.baseurl }}/images/publicproject/description.png" alt="Step 1. The project's first tab, *Description*, describes what this project is all about.">
+      <div class="carousel-caption">
+        Step 1. The project's first tab, *Description*, describes what this project is all about.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/collections.png" alt="The *Data collections* tab contains the various pipeline inputs, logs, and outputs.">
+      <div class="carousel-caption">
+        The *Data collections* tab contains the various pipeline inputs, logs, and outputs.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instances.png" alt="You can see the jobs and pipelines in this project by accessing the *Jobs and pipelines* tab.">
+      <div class="carousel-caption">
+        You can see the jobs and pipelines in this project by accessing the *Jobs and pipelines* tab.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/collection-show.png" alt="In the *Data collections* tab, click on the *Show* icon to the left of a collection to see the collection contents.">
+      <div class="carousel-caption">
+        In the *Data collections* tab, click on the *Show* icon to the left of a collection to see the collection contents.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/collection-files.png" alt="The collection page lists the details about it. The *Files* tab can be used to view and download individual files in it.">
+      <div class="carousel-caption">
+        The collection page lists the details about it. The *Files* tab can be used to view and download individual files in it.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/collection-graph.png" alt="The collection *Provenance graph* tab gives a visual representation of this collection's provenance.">
+      <div class="carousel-caption">
+        The collection *Provenance graph* tab gives a visual representation of this collection's provenance.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-show.png" alt="In the project *Jobs and pipelines* tab, click on the *Show* icon to the left of a pipeline to access the pipeline contents.">
+      <div class="carousel-caption">
+        In the project *Jobs and pipelines* tab, click on the *Show* icon to the left of a pipeline to access the pipeline contents.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-components.png" alt="The pipeline *Components* tab details the various jobs in it and how long it took to run it.">
+      <div class="carousel-caption">
+        The pipeline *Components* tab details the various jobs in it and how long it took to run it.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-job.png" alt="Click on the down arrow in one of the job rows to see the job details. You can also click on the job's output.">
+      <div class="carousel-caption">
+        Click on the down arrow <i class="fa fa-lg fa-fw fa-caret-down"></i> in one of the job rows to see the job details. You can also click on the job's output.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-log.png" alt="The *Log* tab can be used to see the log for the pipeline instance.">
+      <div class="carousel-caption">
+        The *Log* tab can be used to see the log for the pipeline instance.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-graph.png" alt="The *Graph* tab provides a visual representation of the pipeline run.">
+      <div class="carousel-caption">
+        The *Graph* tab provides a visual representation of the pipeline run.
+      </div>
+    </div>
+
+    <div class="item">
+      <img src="{{ site.baseurl }}/images/publicproject/instance-advanced.png" alt="The *Advanced* tab can be used to access metadata about the pipeline. [END]">
+      <div class="carousel-caption">
+        The *Advanced* tab can be used to access metadata about the pipeline. [END]
+      </div>
+    </div>
+  </div>
+
+  <!-- Controls -->
+  <a class="left carousel-control" href="#carousel-publicproject" role="button" data-slide="prev">
+    <span class="glyphicon glyphicon-chevron-left" aria-hidden="true"></span>
+    <span class="sr-only">Previous</span>
+  </a>
+  <a class="right carousel-control" href="#carousel-publicproject" role="button" data-slide="next">
+    <span class="glyphicon glyphicon-chevron-right" aria-hidden="true"></span>
+    <span class="sr-only">Next</span>
+  </a>
+</div>
+
+Tip: You may need to make your browser window bigger to see full-size images in the gallery above.
diff --git a/doc/user/getting_started/check-environment.html.textile.liquid b/doc/user/getting_started/check-environment.html.textile.liquid

index 46156b7f85e2b5e102b42cb5d4ef158aab18c79d..4a5105bdd8bbb0e9c538e1281b586501b46fb742 100644 (file)
--- a/doc/user/getting_started/check-environment.html.textile.liquid
+++ b/doc/user/getting_started/check-environment.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: "Checking your environment"
  ...
  
-First, log into an Arvados VM instance (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
+First, log into an Arvados VM instance (instructions for "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login) or install the Arvados "Command line SDK":{{site.baseurl}}/sdk/cli/install.html and "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html on your workstation.
  
  Check that you are able to access the Arvados API server using @arv user current@.  If it is able to access the API server, it will print out information about your account:
  
diff --git a/doc/user/getting_started/community.html.textile.liquid b/doc/user/getting_started/community.html.textile.liquid

index 8b6e22d1fd3effb7091c0244d5a1572a61a5be4d..7ef903c3de9c5dddd84a39b0cd5906feaa47d522 100644 (file)
--- a/doc/user/getting_started/community.html.textile.liquid
+++ b/doc/user/getting_started/community.html.textile.liquid
@@ -6,7 +6,7 @@ title: Arvados Community and Getting Help
  
  h2. On the web
  
-The Arvados Free Sofware project page is located at "http://arvados.org":http://arvados.org .  The "Arvados Wiki":https://arvados.org/projects/arvados/wiki is a collaborative site for documenting Arvados has an overview of the Arvados Platform and Components.  The "Arvados blog":https://arvados.org/projects/arvados/blogs posts articles of interest about Arvados.
+The Arvados Free Sofware project page is located at "http://arvados.org":http://arvados.org .  The "Arvados Wiki":https://arvados.org/projects/arvados/wiki is a collaborative site for documenting Arvados and provides an overview of the Arvados Platform and Components.  The "Arvados blog":https://arvados.org/projects/arvados/blogs posts articles of interest about Arvados.
  
  h2. Mailing lists
  
@@ -14,7 +14,7 @@ The "Arvados user mailing list":http://lists.arvados.org/mailman/listinfo/arvado
  
  h2. IRC
  
-The "#arvados":irc://irc.oftc.net:6667/#arvados IRC (Internet Relay Chat) channel at on the "Open and Free Technology Community (irc.oftc.net)":http://www.oftc.net/oftc/ is available for live discussion and support.  You can use a traditional IRC client or "join OFTC over the web.":https://webchat.oftc.net/?channels=arvados
+The "#arvados":irc://irc.oftc.net:6667/#arvados IRC (Internet Relay Chat) channel at the "Open and Free Technology Community (irc.oftc.net)":http://www.oftc.net/oftc/ is available for live discussion and support.  You can use a traditional IRC client or "join OFTC over the web.":https://webchat.oftc.net/?channels=arvados
  
  h2. Bug tracking
  
diff --git a/doc/user/getting_started/ssh-access-unix.html.textile.liquid b/doc/user/getting_started/ssh-access-unix.html.textile.liquid

index 83513b8a26a999707f83057ed27af34d19be2832..a9eb8c135943fd58a9f7b9d91627af19059386a0 100644 (file)
--- a/doc/user/getting_started/ssh-access-unix.html.textile.liquid
+++ b/doc/user/getting_started/ssh-access-unix.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: Accessing an Arvados VM with SSH - Unix Environments
  ...
  
-This document is for Unix environments (Linux, OS X, Cygwin). If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
+This document is for accessing an arvados VM using SSK keys in Unix environments (Linux, OS X, Cygwin). If you would like to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Windows environment, please visit the "Accessing an Arvados VM with SSH - Windows Environments":ssh-access-windows.html page.
  
  {% include 'ssh_intro' %}
  
diff --git a/doc/user/getting_started/ssh-access-windows.html.textile.liquid b/doc/user/getting_started/ssh-access-windows.html.textile.liquid

index 7a9ab27d39f46025ecdc6a69e5075ae1317989df..c3a06405493d9c340b010547e88057aacb6039bf 100644 (file)
--- a/doc/user/getting_started/ssh-access-windows.html.textile.liquid
+++ b/doc/user/getting_started/ssh-access-windows.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: Accessing an Arvados VM with SSH - Windows Environments
  ...
  
-This document is for Windows environments. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
+This document is for accessing an arvados VM using SSK keys in Windows environments. If you would like to use to access VM through your browser, please visit the "Accessing an Arvados VM with Webshell":vm-login-with-webshell.html page. If you are using a Unix environment (Linux, OS X, Cygwin), please visit the "Accessing an Arvados VM with SSH - Unix Environments":ssh-access-unix.html page.
  
  {% include 'ssh_intro' %}
  
diff --git a/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid

new file mode 100644 (file)

index 0000000..50fa474
--- /dev/null
+++ b/doc/user/getting_started/vm-login-with-webshell.html.textile.liquid
@@ -0,0 +1,19 @@
+---
+layout: default
+navsection: userguide
+title: Accessing an Arvados VM with Webshell
+...
+
+This document describes how to access an Arvados VM with Webshell from Workbench.
+
+h2(#webshell). Access VM using webshell
+
+Webshell gives you access to an arvados virtual machine from your browser with no additional setup.
+
+In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Virtual machines* to see the list of virtual machines you can access.
+
+Each row in the Virtual Machines panel lists the hostname of the VM, along with a <code>Log in as *you*</code> button under the column "Web shell beta". Clicking on this button will open up a webshell terminal for you in a new browser tab and log you in.
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/vm-access-with-webshell.png!
+
+You are now ready to work in your Arvados VM.
diff --git a/doc/user/getting_started/workbench.html.textile.liquid b/doc/user/getting_started/workbench.html.textile.liquid

index 54ab71bb7127ee8be580995df917ae580cdec25f..6e334ba0dd7583ddae43b48149a270d3d92ad458 100644 (file)
--- a/doc/user/getting_started/workbench.html.textile.liquid
+++ b/doc/user/getting_started/workbench.html.textile.liquid
@@ -6,7 +6,7 @@ title: Accessing Arvados Workbench
  
  If you are using the default Arvados instance for this guide, you can Access Arvados Workbench using this link:
  
-<a href="https://{{ site.arvados_workbench_host }}/" target="_blank">https://{{ site.arvados_workbench_host }}/</a>
+<a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}/</a>
  
  (If you are using a different Arvados instance than the default for this guide, replace *{{ site.arvados_workbench_host }}* with your private instance in all of the examples in this guide.)
  
diff --git a/doc/user/index.html.textile.liquid b/doc/user/index.html.textile.liquid

index b108ff707708a35d3c95c97c1e153c306b753055..0967cbc308b3054ed7b7cd32f0903d7b1590f353 100644 (file)
--- a/doc/user/index.html.textile.liquid
+++ b/doc/user/index.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: Welcome to Arvados!
  ...
  
-_If you are new to Arvados, please read the "Getting Started":{{site.baseurl}}/start/index.html guide for a quick introduction to working with Arvados._
+_If you are new to Arvados, please try the Quickstart on <a href="http://doc.arvados.org">the documentation homepage</a> instead of this detailed User Guide._
  
  This guide provides a reference for using Arvados to solve big data bioinformatics problems, including:
  
@@ -13,9 +13,7 @@ This guide provides a reference for using Arvados to solve big data bioinformati
  * Storing and querying metadata about genome sequence files, such as human subjects and their phenotypic traits using the "Arvados Metadata Database.":{{site.baseurl}}/user/topics/tutorial-trait-search.html
  * Accessing, organizing, and sharing data, pipelines and results using the "Arvados Workbench":{{site.baseurl}}/user/getting_started/workbench.html web application.
  
-This User Guide goes into more depth than the "Getting Started guide":{{site.baseurl}}/start/index.html, covers how to develop your own pipelines in addition to using pre-existing pipelines, covers the Arvados commandline tools in addition to the Workbench graphical interface to Arvados, and can be referenced in any order.
-
-The examples in this guide use the Arvados instance located at <a href="https://{{ site.arvados_workbench_host }}/" target="_blank">https://{{ site.arvados_workbench_host }}</a>.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
+The examples in this guide use the Arvados instance located at <a href="{{site.arvados_workbench_host}}/" target="_blank">{{site.arvados_workbench_host}}</a>.  If you are using a different Arvados instance replace @{{ site.arvados_workbench_host }}@ with your private instance in all of the examples in this guide.
  
  Curoverse maintains a public Arvados instance located at <a href="https://workbench.qr1hi.arvadosapi.com/" target="_blank">https://workbench.qr1hi.arvadosapi.com/</a>.  You must have an account in order to use this service.  If you would like to request an account, please send an email to "arvados@curoverse.com":mailto:arvados@curoverse.com.
  
diff --git a/doc/user/reference/api-tokens.html.textile.liquid b/doc/user/reference/api-tokens.html.textile.liquid

index 768c7d1c266a2d137bc76ed1a86b0b91b7f3ca10..97ad65f8d638732f5d7e16a97a07e930b28e7581 100644 (file)
--- a/doc/user/reference/api-tokens.html.textile.liquid
+++ b/doc/user/reference/api-tokens.html.textile.liquid
@@ -6,15 +6,15 @@ title: "Getting an API token"
  
  The Arvados API token is a secret key that enables the @arv@ command line client to access Arvados with the proper permissions.
  
-Access the Arvados Workbench using this link: "https://{{ site.arvados_workbench_host }}/":https://{{ site.arvados_workbench_host }}/  (Replace @{{ site.arvados_api_host }}@ with the hostname of your local Arvados instance if necessary.)
+Access the Arvados Workbench using this link: "{{site.arvados_workbench_host}}/":{{site.arvados_workbench_host}}/  (Replace the hostname portion with the hostname of your local Arvados instance if necessary.)
  
-Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
+Open a shell on the system where you want to use the Arvados client. This may be your local workstation, or an Arvados virtual machine accessed with "Webshell":{{site.baseurl}}/user/getting_started/vm-login-with-webshell.html or SSH (instructions for "Unix":{{site.baseurl}}/user/getting_started/ssh-access-unix.html#login or "Windows":{{site.baseurl}}/user/getting_started/ssh-access-windows.html#login).
  
-Click on the link with your _email address_ in the upper right corner to access your account menu, then click on the menu item *Manage account* to go to the account management page. On the *Manage account* page, you will see the *Current Token* panel, which lists your current token and instructions to set up your environment.
+In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*, which lists your current token and instructions to set up your environment.
  
  h2. Setting environment variables
  
-For your convenience, the *Manage account* page on Workbench provides the *Current Token* panel that includes a command you may copy and paste directly into the shell.  It will look something as the following.
+The *Current token* page, accessed using the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu, includes a command you may copy and paste directly into the shell.  It will look something as the following.
  
  bc. HISTIGNORE=$HISTIGNORE:'export ARVADOS_API_TOKEN=*'
  export ARVADOS_API_TOKEN=2jv9346o396exampledonotuseexampledonotuseexes7j1ld
diff --git a/doc/user/topics/arv-copy.html.textile.liquid b/doc/user/topics/arv-copy.html.textile.liquid

new file mode 100644 (file)

index 0000000..07e6048
--- /dev/null
+++ b/doc/user/topics/arv-copy.html.textile.liquid
@@ -0,0 +1,80 @@
+---
+layout: default
+navsection: userguide
+title: "Using arv-copy"
+...
+
+
+This tutorial describes how to copy Arvados objects from one cluster to another by using @arv-copy@.
+
+{% include 'tutorial_expectations' %}
+
+h2. arv-copy
+
+@arv-copy@ allows users to copy collections, pipeline templates, and pipeline instances from one cluster to another. By default, @arv-copy@ will recursively go through a template or instance and copy all dependencies associated with the object.
+
+For example, let's copy from our <a href="https://cloud.curoverse.com/">beta cloud instance *qr1hi*</a> to *dst_cluster*. The names *qr1hi* and *dst_cluster* are interchangable with any cluster name. You can find the cluster name from the prefix of the uuid of the object you want to copy. For example, in *qr1hi*-4zz18-tci4vn4fa95w0zx, the cluster name is qr1hi.
+
+In order for the clusters to be able to communicate with each other, you must create custom configuration files for both clusters. In the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Current token*. Copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ in both of your clusters. Then, create two configuration files, one for each cluster. The names of the files must have the format of *uuid_prefix.conf*. In our example, let's make two files, one for *qr1hi* and one for *dst_cluster*. From your *Current token* page in *qr1hi* and *dst_cluster*, copy the @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/api-token-host.png!
+
+Copy your @ARVADOS_API_HOST@ and @ARVADOS_API_TOKEN@ into the config files as shown below in the shell account from which you are executing the commands. For example, the default shell you may have access to is shell.qr1hi. You can add these files in ~/.config/arvados/ in the qr1hi shell terminal.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd ~/.config/arvados</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=qr1hi.arvadosapi.com" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=123456789abcdefghijkl" >> qr1hi.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_HOST=dst_cluster.arvadosapi.com" >> dst_cluster.conf</span>
+~$ <span class="userinput">echo "ARVADOS_API_TOKEN=987654321lkjihgfedcba" >> dst_cluster.conf</span>
+</code></pre>
+</notextile>
+
+Now you're ready to copy between *qr1hi* and *dst_cluster*!
+
+h3. How to copy a collection
+
+First, select the uuid of the collection you want to copy from the source cluster. The uuid can be found in the collection display page in the collection summary area (top left box), or from the URL bar (the part after @collections/...@)
+
+Now copy the collection from *qr1hi* to *dst_cluster*. We will use the uuid @qr1hi-4zz18-tci4vn4fa95w0zx@ as an example. You can find this collection in the <a href="https://cloud.curoverse.com/collections/qr1hi-4zz18-tci4vn4fa95w0zx">lobSTR v.3 project on cloud.curoverse.com</a>.
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster qr1hi-4zz18-tci4vn4fa95w0zx</span>
+qr1hi-4zz18-tci4vn4fa95w0zx: 6.1M / 6.1M 100.0% 
+arvados.arv-copy[1234] INFO: Success: created copy with uuid dst_cluster-4zz18-8765943210cdbae
+</code></pre>
+</notextile>
+
+The output of arv-copy displays the uuid of the collection generated in the destination cluster. By default, the output is placed in your home project in the destination cluster. If you want to place your collection in a pre-created project, you can specify the project you want it to be in using the tag @--project-uuid@ followed by the project uuid.
+
+For example, this will copy the collection to project dst_cluster-j7d0g-a894213ukjhal12 in the destination cluster.
+
+<notextile> <pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --project-uuid dst_cluster-j7d0g-a894213ukjhal12 qr1hi-4zz18-tci4vn4fa95w0zx</span> 
+</code></pre>
+</notextile>
+
+h3. How to copy a pipeline template or pipeline instance
+
+{% include 'arv_copy_expectations' %}
+
+We will use the uuid @qr1hi-d1hrv-nao0ohw8y7dpf84@ as an example pipeline instance.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+To git@git.dst_cluster.arvadosapi.com:$USER/tutorial.git
+ * [new branch] git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d -> git_git_qr1hi_arvadosapi_com_arvados_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d
+arvados.arv-copy[19694] INFO: Success: created copy with uuid dst_cluster-d1hrv-rym2h5ub9m8ofwj
+</code></pre>
+</notextile>
+
+New branches in the destination git repo will be created for each branch used in the pipeline template. For example, if your source branch was named ac21f0d45a76294aaca0c0c0fdf06eb72d03368d, your new branch will be named @git_git_qr1hi_arvadosapi_com_reponame_git_ac21f0d45a76294aaca0c0c0fdf06eb72d03368d@. 
+
+By default, if you copy a pipeline instance recursively, you will find that the template as well as all the dependencies are in your home project.
+
+If you would like to copy the object without dependencies, you can use the @--no-recursive@ tag.
+
+For example, we can copy the same object using this tag.
+
+<notextile>
+<pre><code>~$ <span class="userinput">arv-copy --src qr1hi --dst dst_cluster --dst-git-repo $USER/tutorial --no-recursive qr1hi-d1hrv-nao0ohw8y7dpf84</span>
+</code></pre>
+</notextile>
diff --git a/doc/user/topics/arv-docker.html.textile.liquid b/doc/user/topics/arv-docker.html.textile.liquid

index 0a0693f3d28af21ff03bb32c80446ce89480f34c..e6c83affb5d9cb86fab64803027c9f8369c5e673 100644 (file)
--- a/doc/user/topics/arv-docker.html.textile.liquid
+++ b/doc/user/topics/arv-docker.html.textile.liquid
@@ -14,7 +14,7 @@ This page will demonstrate how to:
  # Upload that image to Arvados for use by Crunch jobs
  # Share your image with others
  
-{% include 'tutorial_expectations' %}
+{% include 'tutorial_expectations_workstation' %}
  
  You also need ensure that "Docker is installed,":https://docs.docker.com/installation/ the Docker daemon is running, and you have permission to access Docker.  You can test this by running @docker version@.  If you receive a permission denied error, your user account may need to be added to the @docker@ group.  If you have root access, you can add yourself to the @docker@ group using @$ sudo addgroup $USER docker@ then log out and log back in again; otherwise consult your local sysadmin.
  
@@ -25,7 +25,7 @@ The easiest way to begin is to start from the "arvados/jobs" image which already
  Download the latest "arvados/jobs" image from the Docker registry:
  
  <notextile>
-<pre><code>$ <span class="userinput">docker pull arvados/jobs</span>
+<pre><code>$ <span class="userinput">docker pull arvados/jobs:latest</span>
  Pulling repository arvados/jobs
  3132168f2acb: Download complete
  a42b7f2c59b6: Download complete
@@ -197,6 +197,6 @@ h2. Share Docker images
  Docker images are subject to normal Arvados permissions.  If wish to share your Docker image with others (or wish to share a pipeline template that uses your Docker image) you will need to use @arv keep docker@ with the @--project-uuid@ option to upload the image to a shared project.
  
  <notextile>
-<pre><code>$ <span class="userinput">arv keep docker --project-uuid zzzzz-j7d0g-u7zg1qdaowykd8d arvados/jobs-with-r</span>
+<pre><code>$ <span class="userinput">arv keep docker --project-uuid qr1hi-j7d0g-xxxxxxxxxxxxxxx arvados/jobs-with-r</span>
  </code></pre>
  </notextile>
diff --git a/doc/user/topics/arv-run.html.textile.liquid b/doc/user/topics/arv-run.html.textile.liquid

index 862b19c2c9a8d4912818b517d98eb4b7e1b4099c..8d1aca63057f44f22abb5811c2ef73c8478e4129 100644 (file)
--- a/doc/user/topics/arv-run.html.textile.liquid
+++ b/doc/user/topics/arv-run.html.textile.liquid
@@ -43,6 +43,8 @@ h2. Parallel tasks
  
  <notextile>
  <pre>
+$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
+$ <span class="userinput">ls *.fastq</span>
  HWI-ST1027_129_D0THKACXX.1_1.fastq  HWI-ST1027_129_D0THKACXX.1_2.fastq
  $ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC -- *.fastq</span>
  Running pipeline qr1hi-d1hrv-mg3bju0u7r6w241
@@ -75,6 +77,8 @@ You may use "run-command":run-command.html parameter substitution in the output
  
  <notextile>
  <pre>
+$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
+$ <span class="userinput">ls *.fastq</span>
  $ <span class="userinput">arv-run grep -H -n ATTGGAGGAAAGATGAGTGAC \< *.fastq \> '$(task.uuid).txt'</span>
  [...]
   1 stderr run-command: grep -H -n ATTGGAGGAAAGATGAGTGAC < /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq > qr1hi-ot0gb-hmmxf2zubfpmhfk.txt
@@ -99,6 +103,8 @@ Multiple commands may be connected by pipes and execute in the same container:
  
  <notextile>
  <pre>
+$ <span class="userinput">cd ~/keep/by_id/3229739b505d2b878b62aed09895a55a+142</span>
+$ <span class="userinput">ls *.fastq</span>
  $ <span class="userinput">arv-run cat -- *.fastq \| grep -H -n ATTGGAGGAAAGATGAGTGAC \> output.txt</span>
  [...]
   1 stderr run-command: cat /keep/3229739b505d2b878b62aed09895a55a+142/HWI-ST1027_129_D0THKACXX.1_1.fastq | grep -H -n ATTGGAGGAAAGATGAGTGAC > output.txt
@@ -128,7 +134,7 @@ Running pipeline qr1hi-d1hrv-slcnhq5czo764b1
  
  h2. Additional options
  
-* @--docker-image IMG@ : By default, commands run inside a Docker container created from the latest "arvados/jobs" Docker image.  Use this option to specify a different image to use.  Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
+* @--docker-image IMG@ : By default, commands run based in a container created from the @default_docker_image_for_jobs@ setting on the API server.  Use this option to specify a different image to use.  Note: the Docker image must be uploaded to Arvados using @arv keep docker@.
  * @--dry-run@ : Print out the final Arvados pipeline generated by @arv-run@ without submitting it.
  * @--local@ : By default, the pipeline will be submitted to your configured Arvados instance.  Use this option to run the command locally using @arv-run-pipeline-instance --run-jobs-here@.
  * @--ignore-rcode@ : Some commands use non-zero exit codes to indicate nonfatal conditions (e.g. @grep@ returns 1 when no match is found).  Set this to indicate that commands that return non-zero return codes should not be considered failed.
diff --git a/doc/user/topics/arv-web.html.textile.liquid b/doc/user/topics/arv-web.html.textile.liquid

index cf3a3cc950fe2d51adddf56803da25700af83650..44e5fd8f3e6e677a98fbd26d099f3ce3d5bb2700 100644 (file)
--- a/doc/user/topics/arv-web.html.textile.liquid
+++ b/doc/user/topics/arv-web.html.textile.liquid
@@ -6,11 +6,14 @@ title: "Using arv-web"
  
  @arv-web@ enables you to run a custom web service from the contents of an Arvados collection.
  
+{% include 'tutorial_expectations_workstation' %}
+
  h2. Usage
  
  @arv-web@ enables you to set up a web service based on the most recent collection in a project.  An arv-web application is a reproducible, immutable application bundle where the web app is packaged with both the code to run and the data to serve.  Because Arvados Collections can be updated with minimum duplication, it is efficient to produce a new application bundle when the code or data needs to be updated; retaining old application bundles makes it easy to go back and run older versions of your web app.
  
  <pre>
+$ cd $HOME/arvados/services/arv-web
  usage: arv-web.py [-h] --project-uuid PROJECT_UUID [--port PORT]
                    [--image IMAGE]
  
diff --git a/doc/user/topics/run-command.html.textile.liquid b/doc/user/topics/run-command.html.textile.liquid

index f1d42adceb9aa9d69a8d0529f2d1013ca92b7ec5..78839196967260dace26bc7791c72656ab85d0c1 100644 (file)
--- a/doc/user/topics/run-command.html.textile.liquid
+++ b/doc/user/topics/run-command.html.textile.liquid
@@ -6,6 +6,8 @@ title: "run-command reference"
  
  The @run-command@ crunch script enables you run command line programs.
  
+{% include 'tutorial_expectations_workstation' %}
+
  h1. Using run-command
  
  The basic @run-command@ process evaluates its inputs and builds a command line, executes the command, and saves the contents of the output directory back to Keep.  For large datasets, @run-command@ can schedule concurrent tasks to execute the wrapped program over a range of inputs (see @task.foreach@ below.)
@@ -73,6 +75,12 @@ table(table table-bordered table-condensed).
  |$(dir ...)        | Takes a reference to an Arvados collection or directory within an Arvados collection and evaluates to a directory path on the local file system where that directory can be accessed by your command.  The path may include a file name, in which case it will evaluate to the parent directory of the file.  Uses Python's os.path.dirname(), so "/foo/bar" will evaluate to "/foo" but "/foo/bar/" will evaluate to "/foo/bar".  Will raise an error if the directory is not accessible. |
  |$(basename&nbsp;...)   | Strip leading directory and trailing file extension from the path provided.  For example, $(basename /foo/bar.baz.txt) will evaluate to "bar.baz".|
  |$(glob ...)       | Take a Unix shell path pattern (supports @*@ @?@ and @[]@) and search the local filesystem, returning the first match found.  Use together with $(dir ...) to get a local filesystem path for Arvados collections.  For example: $(glob $(dir $(mycollection)/*.bam)) will find the first .bam file in the collection specified by the user parameter "mycollection".  If there is more than one match, which one is returned is undefined.  Will raise an error if no matches are found.|
+|$(task.tmpdir)|Designated temporary directory.  This directory will be discarded when the job completes.|
+|$(task.outdir)|Designated output directory.  The contents of this directory will be saved to Keep when the job completes.  A symlink to a file in the keep mount will reference existing Keep blocks in your job output collection, with no data copying or duplication.|
+|$(job.srcdir)|Path to the git working directory ($CRUNCH_SRC).|
+|$(node.cores)|Number of CPU cores on the node.|
+|$(job.uuid)|Current job uuid ($JOB_UUID)|
+|$(task.uuid)|Current task uuid ($TASK_UUID)|
  
  h3. Escape sequences
  
@@ -231,11 +239,24 @@ Provide standard input and standard output redirection.
  
  @task.stdout@ specifies the desired file name in the output directory to save the content of standard output.  When command describes a Unix pipeline, this captures the output of the last command.
  
+h3. task.env
+
+Set environment variables for the command.  Accepts an object mapping environment variables to the desired values.  Parameter substitution is performed on values, but not on the environment variable names themselves.  Example usage:
+
+<pre>
+{
+  "command": ["/bin/sh", "-c", "echo $MY_ENV_VAR"],
+  "task.env": {
+    "MY_ENV_VAR": "Hello world!"
+  }
+}
+</pre>
+
  h3. task.vwd
  
  Background: because Keep collections are read-only, this does not play well with certain tools that expect to be able to write their outputs alongside their inputs (such as tools that generate indexes that are closely associated with the original file.)  The run-command's solution to this is the "virtual working directory".
  
-@task.vwd@ specifies a Keep collection with the starting contents of the directory.  @run-command@ will then populate @task.outdir@ with directories and symlinks to mirror the contents of the @task.vwd@ collection.  Your command will then be able to both access its input files and write its output files in @task.outdir@.  When the command completes, the output collection will merge the output of your command with the contents of the starting collection.  Note that files in the starting collection remain read-only and cannot be altered or deleted.
+@task.vwd@ specifies a Keep collection with the starting contents of the output directory.  @run-command@ will populate @task.outdir@ with directories and symlinks to mirror the contents of the @task.vwd@ collection.  Your command will then be able to both access its input files and write its output files from within @task.outdir@.  When the command completes, run-command will write the contents of the output directory, which will include the output of your command as well as symlinks to files in starting collection.  Note that files from the starting collection remain read-only and cannot be altered, but may be deleted or renamed.
  
  h3. task.foreach
  
diff --git a/doc/user/topics/running-pipeline-command-line.html.textile.liquid b/doc/user/topics/running-pipeline-command-line.html.textile.liquid

index 147fbf0d3a5da3df162c10da49eb82bc6afacc99..9f10fe43df97b6b87e454663fd92d911db917fcc 100644 (file)
--- a/doc/user/topics/running-pipeline-command-line.html.textile.liquid
+++ b/doc/user/topics/running-pipeline-command-line.html.textile.liquid
@@ -7,12 +7,13 @@ title: "Running a pipeline on the command line"
  This tutorial demonstrates how to use the command line to run the same pipeline as described in "running a pipeline using Workbench.":{{site.baseurl}}/user/tutorials/tutorial-pipeline-workbench.html
  
  {% include 'tutorial_expectations' %}
+{% include 'tutorial_cluster_name' %}
  
  When you use the command line, you must use Arvados unique identifiers to refer to objects.  The identifiers in this example correspond to the following Arvados objects:
  
-* <i class="fa fa-fw fa-gear"></i> "Tutorial align using bwa mem (qr1hi-p5p6p-itzkwxblfermlwv)":https://{{ site.arvados_workbench_host }}/pipeline_templates/qr1hi-p5p6p-itzkwxblfermlwv
-* <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":https://{{ site.arvados_workbench_host }}/collections/2463fa9efeb75e099685528b3b9071e0+438
-* <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":https://{{ site.arvados_workbench_host }}/collections/3229739b505d2b878b62aed09895a55a+142
+* <i class="fa fa-fw fa-gear"></i> "Tutorial align using bwa mem (qr1hi-p5p6p-itzkwxblfermlwv)":{{site.arvados_workbench_host}}/pipeline_templates/qr1hi-p5p6p-itzkwxblfermlwv
+* <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":{{site.arvados_workbench_host}}/collections/2463fa9efeb75e099685528b3b9071e0+438
+* <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":{{site.arvados_workbench_host}}/collections/3229739b505d2b878b62aed09895a55a+142
  
  Use @arv pipeline run@ to run the pipeline, supplying the inputs to the bwa-mem component on the command line:
  
@@ -32,9 +33,10 @@ bwa-mem qr1hi-8i9sb-67n1qvsronmd2z6 49bae1066f4ebce72e2587a3efa61c7d+88
  
  This instantiates your pipeline and displays periodic status reports in your terminal window. The new pipeline instance will also show up on the Workbench Dashboard.
  
+
  @arv pipeline run@ submits a job for each pipeline component as soon as the component's inputs are known (i.e., any dependencies are satsified). It terminates when there is no work left to do: this means either all components are satisfied and all jobs have completed successfully, _or_ one or more jobs have failed and it is therefore unproductive to submit any further jobs.
  
-The Keep locators of the output of of the @bwa-mem@ components are available from the last status report shown above:
+The Keep locators of the output of the @bwa-mem@ components are available from the last status report shown above:
  
  <notextile>
  <pre><code>~$ <span class="userinput">arv keep ls -s 49bae1066f4ebce72e2587a3efa61c7d+88</span>
diff --git a/doc/user/topics/tutorial-job1.html.textile.liquid b/doc/user/topics/tutorial-job1.html.textile.liquid

index 0e8b997f9f44dda73633b45122465c8950133f28..92659cede848c3030c0fcf7da2cda2041f67205b 100644 (file)
--- a/doc/user/topics/tutorial-job1.html.textile.liquid
+++ b/doc/user/topics/tutorial-job1.html.textile.liquid
@@ -34,7 +34,7 @@ EOF
  * @cat@ is a standard Unix utility that writes a sequence of input to standard output.
  * @<<EOF@ tells the shell to direct the following lines into the standard input for @cat@ up until it sees the line @EOF@.
  * @>~/the_job@ redirects standard output to a file called @~/the_job@.
-* @"repository"@ is the name of a Git repository to search for the script version.  You can access a list of available git repositories on the Arvados Workbench under "*Code repositories*":https://{{site.arvados_workbench_host}}/repositories.
+* @"repository"@ is the name of a Git repository to search for the script version.  You can access a list of available git repositories on the Arvados Workbench under "*Code repositories*":{{site.arvados_workbench_host}}/repositories.
  * @"script_version"@ specifies the version of the script that you wish to run.  This can be in the form of an explicit Git revision hash, a tag, or a branch.  Arvados logs the script version that was used in the run, enabling you to go back and re-run any past job with the guarantee that the exact same code will be used as was used in the previous run.
  * @"script"@ specifies the name of the script to run.  The script must be given relative to the @crunch_scripts/@ subdirectory of the Git repository.
  * @"script_parameters"@ are provided to the script.  In this case, the input is the PGP data Collection that we "put in Keep earlier":{{site.baseurl}}/user/tutorials/tutorial-keep.html.
@@ -85,11 +85,11 @@ The job is now queued and will start running as soon as it reaches the front of
  
  h2. Monitor job progress
  
-Go to "*Recent jobs*":https://{{site.arvados_workbench_host}}/jobs in Workbench.  Your job should be near the top of the table.  This table refreshes automatically.  When the job has completed successfully, it will show <span class="label label-success">finished</span> in the *Status* column.
+Go to "*Recent jobs*":{{site.arvados_workbench_host}}/jobs in Workbench.  Your job should be near the top of the table.  This table refreshes automatically.  When the job has completed successfully, it will show <span class="label label-success">finished</span> in the *Status* column.
  
  h2. Inspect the job output
  
-On the "Workbench Dashboard":https://{{site.arvados_workbench_host}}, look for the *Output* column of the *Recent jobs* table.  Click on the link under *Output* for your job to go to the files page with the job output.  The files page lists all the files that were output by the job.  Click on the link under the *file* column to view a file, or click on the download button <span class="glyphicon glyphicon-download-alt"></span> to download the output file.
+On the "Workbench Dashboard":{{site.arvados_workbench_host}}, look for the *Output* column of the *Recent jobs* table.  Click on the link under *Output* for your job to go to the files page with the job output.  The files page lists all the files that were output by the job.  Click on the link under the *file* column to view a file, or click on the download button <span class="glyphicon glyphicon-download-alt"></span> to download the output file.
  
  On the command line, you can use @arv job get@ to access a JSON object describing the output:
  
@@ -156,7 +156,7 @@ This MD5 hash matches the MD5 hash which we "computed earlier":{{site.baseurl}}/
  
  h2. The job log
  
-When the job completes, you can access the job log.  On the Workbench, visit "*Recent jobs*":https://{{site.arvados_workbench_host}}/jobs %(rarr)&rarr;% your job's UUID under the *uuid* column %(rarr)&rarr;% the collection link on the *log* row.
+When the job completes, you can access the job log.  On the Workbench, visit "*Recent jobs*":{{site.arvados_workbench_host}}/jobs %(rarr)&rarr;% your job's UUID under the *uuid* column %(rarr)&rarr;% the collection link on the *log* row.
  
  On the command line, the Keep identifier listed in the @"log"@ field from @arv job get@ specifies a collection.  You can list the files in the collection:
  
diff --git a/doc/user/topics/tutorial-parallel.html.textile.liquid b/doc/user/topics/tutorial-parallel.html.textile.liquid

index 9be610358bb5f8133c6f7269c390799e04de8e5d..6d0058b5e950e8c1b0866158ee815a859fcef4a4 100644 (file)
--- a/doc/user/topics/tutorial-parallel.html.textile.liquid
+++ b/doc/user/topics/tutorial-parallel.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: "Concurrent Crunch tasks"
  ...
  
-In the previous tutorials, we used @arvados.job_setup.one_task_per_input_file()@ to automatically create concurrent jobs by creating a separate task per file.  For some types of jobs, you may need to split the work up differently, for example creating tasks to process different segments of a single large file.  In this this tutorial will demonstrate how to create Crunch tasks directly.
+In the previous tutorials, we used @arvados.job_setup.one_task_per_input_file()@ to automatically create concurrent jobs by creating a separate task per file.  For some types of jobs, you may need to split the work up differently, for example creating tasks to process different segments of a single large file.  This tutorial will demonstrate how to create Crunch tasks directly.
  
  Start by entering the @crunch_scripts@ directory of your Git repository:
  
@@ -40,7 +40,7 @@ You should now be able to run your new script using Crunch, with "script" referr
  <pre><code>~/$USER/crunch_scripts$ <span class="userinput">cat &gt;~/the_job &lt;&lt;EOF
  {
   "script": "concurrent-hash.py",
- "repository": "$USER",
+ "repository": "$USER/$USER",
   "script_version": "master",
   "script_parameters":
   {
diff --git a/doc/user/tutorials/add-new-repository.html.textile.liquid b/doc/user/tutorials/add-new-repository.html.textile.liquid

new file mode 100644 (file)

index 0000000..84de5ff
--- /dev/null
+++ b/doc/user/tutorials/add-new-repository.html.textile.liquid
@@ -0,0 +1,42 @@
+---
+layout: default
+navsection: userguide
+title: Adding a new Arvados git repository
+...
+
+Arvados repositories are managed through the Git revision control system. You can use these repositories to store your crunch scripts and run them in the arvados cluster.
+
+{% include 'tutorial_expectations' %}
+
+h2. Setting up Git
+
+Before you start using Git and arvados repositories, you should do some basic configuration (you only need to do this the first time):
+
+<notextile>
+<pre><code>~$ <span class="userinput">git config --global user.name "Your Name"</span>
+~$ <span class="userinput">git config --global user.email $USER@example.com</span></code></pre>
+</notextile>
+
+h2. Add "tutorial" repository
+
+On the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Repositories*.
+
+In the *Repositories* page, you will see the *Add new repository* button.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/repositories-panel.png!
+
+Click the *Add new Repository* button to open the popup to add a new arvados repository. You will see a text box where you can enter the name of the repository. Enter *tutorial* in this text box and click on *Create*.
+
+{% include 'notebox_begin' %}
+The name you enter here must begin with a letter and can only contain alphanumeric characters.
+{% include 'notebox_end' %}
+
+!{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/add-new-repository.png!
+
+This will create a new repository with the name @$USER/tutorial@. It can be accessed using the URL <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile> or <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
+
+Back in the *Repositories* page, you should see the @$USER/tutorial@ repository listed in the name column with these URLs.
+
+!{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/added-new-repository.png!
+
+You are now ready to use this *tutorial* repository to run your crunch scripts.
diff --git a/doc/user/tutorials/git-arvados-guide.html.textile.liquid b/doc/user/tutorials/git-arvados-guide.html.textile.liquid

new file mode 100644 (file)

index 0000000..a46a1d9
--- /dev/null
+++ b/doc/user/tutorials/git-arvados-guide.html.textile.liquid
@@ -0,0 +1,89 @@
+---
+layout: default
+navsection: userguide
+title: Working with an Arvados git repository
+...
+
+This tutorial describes how to work with a new Arvados git repository. Working with an Arvados git repository is analogous to working with other public git repositories. It will show you how to upload custom scripts to a remote Arvados repository, so you can use it in Arvados pipelines.
+
+{% include 'tutorial_expectations' %}
+
+{% include 'tutorial_git_repo_expectations' %}
+
+{% include 'notebox_begin' %}
+For more information about using Git, try
+<notextile>
+<pre><code>$ <span class="userinput">man gittutorial</span></code></pre>
+</notextile> or *"search Google for Git tutorials":http://google.com/#q=git+tutorial*.
+{% include 'notebox_end' %}
+
+h2. Cloning an Arvados repository
+
+Before you start using Git, you should do some basic configuration (you only need to do this the first time):
+
+<notextile>
+<pre><code>~$ <span class="userinput">git config --global user.name "Your Name"</span>
+~$ <span class="userinput">git config --global user.email $USER@example.com</span></code></pre>
+</notextile>
+
+On the Arvados Workbench, click on the dropdown menu icon <span class="fa fa-lg fa-user"></span> <span class="caret"></span> in the upper right corner of the top navigation menu to access the user settings menu, and click on the menu item *Repositories*. In the *Repositories* page, you should see the @$USER/tutorial@ repository listed in the *name* column.  Next to *name* is the column *URL*. Copy the *URL* value associated with your repository.  This should look like <notextile><code>https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</code></notextile>. Alternatively, you can use <notextile><code>git@git.{{ site.arvados_api_host }}:$USER/tutorial.git</code></notextile>
+
+Next, on the Arvados virtual machine, clone your Git repository:
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
+~$ <span class="userinput">git clone https://git.{{ site.arvados_api_host }}/$USER/tutorial.git</span>
+Cloning into 'tutorial'...</code></pre>
+</notextile>
+
+This will create a Git repository in the directory called @tutorial@ in your home directory. Say yes when prompted to continue with connection.
+Ignore any warning that you are cloning an empty repository.
+
+*Note:* If you are prompted for username and password when you try to git clone using this command, you may first need to update your git configuration. Execute the following commands to update your git configuration.
+
+<notextile>
+<pre>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.username' none</span></code>
+<code>~$ <span class="userinput">git config 'credential.https://git.{{ site.arvados_api_host }}/.helper' '!cred(){ cat >/dev/null; if [ "$1" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred'</span></code>
+</pre>
+</notextile>
+
+h2. Creating a git branch in an Arvados repository
+
+Create a git branch named *tutorial_branch* in the *tutorial* Arvados git repository.
+
+<notextile>
+<pre><code>~$ <span class="userinput">cd tutorial</span>
+~$ <span class="userinput">git checkout -b tutorial_branch</span>
+</code></pre>
+</notextile>
+
+h2. Adding files or scripts to an Arvados repository
+
+First, create a file named *tutorial.txt* in the local repository. Although this tutorial shows how to add a text file to Arvados, the same steps can be used to add any of your custom bash, R, or python scripts to an Arvados repository.
+
+<notextile>
+<pre><code>~$ <span class="userinput">echo 'hello world' > tutorial.txt</span>
+</code></pre>
+</notextile>
+
+Next, add the new file to the git index.
+
+<notextile>
+<pre><code>~$ <span class="userinput">git add tutorial.txt</span>
+</code></pre>
+</notextile>
+
+Next, commit all the changes to the local repository, along with a commit message that describes what this script does.
+
+<notextile>
+<pre><code>~$ <span class="userinput">git commit -a -m "Added tutorial.txt"</span>
+</code></pre>
+</notextile>
+
+Finally, push the changes in the local repository to the remote repository.
+
+<notextile>
+<pre><code>~$ <span class="userinput">git push origin tutorial_branch</span>
+</code></pre>
+</notextile>
diff --git a/doc/user/tutorials/running-external-program.html.textile.liquid b/doc/user/tutorials/running-external-program.html.textile.liquid

index 18f5f7d35f75eabeb2c45be8744846572bc7d548..accc47c3d8e089a89768b3f90605ade643b86357 100644 (file)
--- a/doc/user/tutorials/running-external-program.html.textile.liquid
+++ b/doc/user/tutorials/running-external-program.html.textile.liquid
@@ -21,12 +21,13 @@ This will open the template record in an interactive text editor (as specified b
  * @"name"@ is a human-readable name for the pipeline.
  * @"components"@ is a set of scripts or commands that make up the pipeline.  Each component is given an identifier (@"bwa-mem"@ and @"SortSam"@) in this example).
  ** Each entry in components @"components"@ is an Arvados job submission.  For more information about individual jobs, see the "job object reference":{{site.baseurl}}/api/schema/Job.html and "job create method.":{{site.baseurl}}/api/methods/jobs.html#create
-* @"repository"@, @"script_version"@, and @"script"@ indicate that we intend to use the external @"run-command"@ tool wrapper that is part of the Arvados.  These parameters are described in more detail in "Writing a script":tutorial-firstscript.html
+* @"repository"@, @"script_version"@, and @"script"@ indicate that we intend to use the external @"run-command"@ tool wrapper that is part of the Arvados.  These parameters are described in more detail in "Writing a script":tutorial-firstscript.html.
  * @"runtime_constraints"@ describes runtime resource requirements for the component.
-** @"docker_image"@ specifies the "Docker":https://www.docker.com/ runtime environment in which to run the job.  The Docker image @"arvados/jobs-java-bwa-samtools"@ supplied here has the Arvados SDK, Java runtime environment, bwa, and samtools installed.
+** @"docker_image"@ specifies the "Docker":https://www.docker.com/ runtime environment in which to run the job.  The Docker image @"bcosc/arv-base-java"@ supplied here has the Java runtime environment, bwa, and samtools installed.
+** @"arvados_sdk_version"@ specifies a version of the Arvados SDK to load alongside the job's script. The example uses 'master'. If you would like to use a specific version of the sdk, you can find it in the "Arvados Python sdk repository":https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*.
  * @"script_parameters"@ describes the component parameters.
  ** @"command"@ is the actual command line to invoke the @bwa@ and then @SortSam@.  The notation @$()@ denotes macro substitution commands evaluated by the run-command tool wrapper.
-** @"stdout"@ indicates that the output of this command should be captured to a file.
+** @"task.stdout"@ indicates that the output of this command should be captured to a file.
  ** @$(node.cores)@ evaluates to the number of cores available on the compute node at time the command is run.
  ** @$(tmpdir)@ evaluates to the local path for temporary directory the command should use for scratch data.
  ** @$(reference_collection)@ evaluates to the script_parameter @"reference_collection"@
@@ -34,19 +35,44 @@ This will open the template record in an interactive text editor (as specified b
  ** @$(file $(...))@ constructs a local path to a given file within the supplied Arvados collection.
  ** @$(glob $(...))@ searches the specified path based on a file glob pattern and evalutes to the first result.
  ** @$(basename $(...))@ evaluates to the supplied path with leading path portion and trailing filename extensions stripped
-** @"output_of"@ indicates that the @output@ of the @bwa-mem@ component should be used as the @"input"@ of @SortSam@.  Arvados uses these dependencies between components to automatically determine the correct order to run them.
+* @"output_of"@ indicates that the @output@ of the @bwa-mem@ component should be used as the @"input"@ script parameter of @SortSam@.  Arvados uses these dependencies between components to automatically determine the correct order to run them.
  
  When using @run-command@, the tool should write its output to the current working directory.  The output will be automatically uploaded to Keep when the job completes.
  
  See the "run-command reference":{{site.baseurl}}/user/topics/run-command.html for more information about using @run-command@.
  
+*Note:* When trying to get job reproducibility without re-computation, you need to set these parameters to their specific hashes. Using a version such as master in @"arvados_sdk_version"@ will grab the latest version hash, which will allow Arvados to re-compute your job if the sdk gets updated.
+* @"arvados_sdk_version"@ : The latest version can be found on the "Arvados Python sdk repository":https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*.
+* @"script_version"@ : The current version of your script in your git repository can be found by using the following command:
+
+<notextile>
+<pre><code>~$ <span class="userinput">git rev-parse HEAD</span></code></pre>
+</notextile>
+
+* @"docker_image"@ : The docker image hash used is found on the "Collection page":https://cloud.curoverse.com/collections/qr1hi-4zz18-dov6im679g3jr1n as the *Content address*.
+
  h2. Running your pipeline
  
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":https://{{ site.arvados_workbench_host }}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
+Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
  
-Test data is available in the "Arvados Tutorial":https://{{ site.arvados_workbench_host }}/projects/qr1hi-j7d0g-u7zg1qdaowykd8d project:
+Test data is available in the "Arvados Tutorial":{{site.arvados_workbench_host}}/projects/qr1hi-j7d0g-u7zg1qdaowykd8d project:
  
-* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":https://{{ site.arvados_workbench_host }}/collections/2463fa9efeb75e099685528b3b9071e0+438 for the "reference_collection" parameter
-* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":https://{{ site.arvados_workbench_host }}/collections/3229739b505d2b878b62aed09895a55a+142 for the "sample" parameter
+* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial chromosome 19 reference (2463fa9efeb75e099685528b3b9071e0+438)":{{site.arvados_workbench_host}}/collections/2463fa9efeb75e099685528b3b9071e0+438 for the "reference_collection" parameter
+* Choose <i class="fa fa-fw fa-archive"></i> "Tutorial sample exome (3229739b505d2b878b62aed09895a55a+142)":{{site.arvados_workbench_host}}/collections/3229739b505d2b878b62aed09895a55a+142 for the "sample" parameter
  
  For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/schema/PipelineTemplate.html
+
+h2. Re-using your pipeline run
+
+Arvados allows users to re-use jobs that have the same inputs in order to save computing time and resources. Users are able to change a job downstream without re-computing earlier jobs. This section shows which version control parameters should be tuned to make sure Arvados will not re-compute your jobs. 
+
+Note: Job reuse can only happen if all input collections do not change.
+
+* @"arvados_sdk_version"@ : The arvados_sdk_version parameter is used to download the specific version of the Arvados sdk into the docker image. The latest version can be found in the "Arvados Python sdk repository":https://arvados.org/projects/arvados/repository/revisions/master/show/sdk/python under *Latest revisions*. Make sure you set this to the same version as the previous run that you are trying to reuse.
+* @"script_version"@ : The script_version is the commit hash of the git branch that the crunch script resides in. This information can be found in your git repository by using the following command:
+
+<notextile>
+<pre><code>~$ <span class="userinput">git rev-parse HEAD</span></code></pre>
+</notextile>
+
+* @"docker_image"@ : This specifies the "Docker":https://www.docker.com/ runtime environment where jobs run their scripts. Docker version control is similar to git, and you can commit and push changes to your images. You must re-use the docker image hash from the previous run to use the same image. It can be found on the "Collection page":https://cloud.curoverse.com/collections/qr1hi-4zz18-dov6im679g3jr1n as the *Content address* or the *docker_image_locator* in a job's metadata.
diff --git a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid

index 6fe88fe156c5a78577b9a271730e7da938d721a6..bf73c8cc1943dce1bd22f9df03756edb947a8111 100644 (file)
--- a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid
@@ -11,10 +11,11 @@ This tutorial demonstrates how to write a script using Arvados Python SDK.  The
  
  This tutorial uses @$USER@ to denote your username.  Replace @$USER@ with your user name in all the following examples.
  
-Start by creating a directory called @$USER@ .  Next, create a subdirectory called @crunch_scripts@ and change to that directory:
+Start by creating a directory called @tutorial@ in your home directory.  Next, create a subdirectory called @crunch_scripts@ and change to that directory:
  
  <notextile>
-<pre><code>~$ <span class="userinput">mkdir -p tutorial/crunch_scripts</span>
+<pre><code>~$ <span class="userinput">cd $HOME</span>
+~$ <span class="userinput">mkdir -p tutorial/crunch_scripts</span>
  ~$ <span class="userinput">cd tutorial/crunch_scripts</span></code></pre>
  </notextile>
  
@@ -82,23 +83,23 @@ You can now run your script on your local workstation or VM using @arv-crunch-jo
  2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  release job allocation
  2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  Freeze not implemented
  2014-08-06_15:16:35 qr1hi-8i9sb-qyrat80ef927lam 14473  collate
-2014-08-06_15:16:36 qr1hi-8i9sb-qyrat80ef927lam 14473  output uuid qr1hi-4zz18-n91qrqfp3zivexo
+2014-08-06_15:16:36 qr1hi-8i9sb-qyrat80ef927lam 14473  collated output manifest text to send to API server is 105 bytes with access tokens
  2014-08-06_15:16:36 qr1hi-8i9sb-qyrat80ef927lam 14473  output hash c1b44b6dc41ef334cf1136033ca950e6+54
  2014-08-06_15:16:37 qr1hi-8i9sb-qyrat80ef927lam 14473  finish
  2014-08-06_15:16:38 qr1hi-8i9sb-qyrat80ef927lam 14473  log manifest is 7fe8cf1d45d438a3ca3ac4a184b7aff4+83
  </code></pre>
  </notextile>
  
-Although the job runs locally, the output of the job has been saved to Keep, the Arvados file store.  The "output uuid" line (fourth from the bottom) provides the UUID of the Arvados collection where the script's output has been saved.  Copy the output identifier and use @arv-ls@ to list the contents of your output collection, and @arv-get@ to download it to the current directory:
+Although the job runs locally, the output of the job has been saved to Keep, the Arvados file store.  The "output hash" line (third from the bottom) provides the portable data hash of the Arvados collection where the script's output has been saved.  Copy the output hash and use @arv-ls@ to list the contents of your output collection, and @arv-get@ to download it to the current directory:
  
  <notextile>
-<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">arv-ls qr1hi-4zz18-n91qrqfp3zivexo</span>
+<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">arv-ls c1b44b6dc41ef334cf1136033ca950e6+54</span>
  ./md5sum.txt
-~/tutorial/crunch_scripts$ <span class="userinput">arv-get qr1hi-4zz18-n91qrqfp3zivexo/ .</span>
+~/tutorial/crunch_scripts$ <span class="userinput">arv-get c1b44b6dc41ef334cf1136033ca950e6+54/ .</span>
  0 MiB / 0 MiB 100.0%
  ~/tutorial/crunch_scripts$ <span class="userinput">cat md5sum.txt</span>
  44b8ae3fde7a8a88d2f7ebd237625b4f c1bad4b39ca5a924e481008009d94e32+210/var-GS000016015-ASM.tsv.bz2
  </code></pre>
  </notextile>
  
-Running locally is convenient for development and debugging, as it permits a fast iterative development cycle.  Your job run is also recorded by Arvados, and will appear in the *Recent jobs and pipelines* panel on the "Workbench Dashboard":https://{{site.arvados_workbench_host}}.  This provides limited provenance, by recording the input parameters, the execution log, and the output.  However, running locally does not allow you to scale out to multiple nodes, and does not store the complete system snapshot required to achieve reproducibility; to do that you need to "submit a job to the Arvados cluster":{{site.baseurl}}/user/tutorials/tutorial-submit-job.html.
+Running locally is convenient for development and debugging, as it permits a fast iterative development cycle.  Your job run is also recorded by Arvados, and will appear in the *Recent jobs and pipelines* panel on the "Workbench Dashboard":{{site.arvados_workbench_host}}.  This provides limited provenance, by recording the input parameters, the execution log, and the output.  However, running locally does not allow you to scale out to multiple nodes, and does not store the complete system snapshot required to achieve reproducibility; to do that you need to "submit a job to the Arvados cluster":{{site.baseurl}}/user/tutorials/tutorial-submit-job.html.
diff --git a/doc/user/tutorials/tutorial-keep-get.html.textile.liquid b/doc/user/tutorials/tutorial-keep-get.html.textile.liquid

index 1f980edcd0c55c56a16b4ec8bc44a90363a392a1..1e894e7c25141ba00aa722ad2adef22fd508e37f 100644 (file)
--- a/doc/user/tutorials/tutorial-keep-get.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-keep-get.html.textile.liquid
@@ -74,6 +74,6 @@ This will create a sharing link for the collection as shown below. You can copy
  
  !{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/shared-collection.png!
  
-A user with this url can download this collection by simply accessing this url. It will present a downloadable version of the collection as shown below.
+A user with this url can download this collection by simply accessing this url using browser. It will present a downloadable version of the collection as shown below.
  
  !{display: block;margin-left: 25px;margin-right: auto;border:1px solid lightgray;}{{ site.baseurl }}/images/download-shared-collection.png!
diff --git a/doc/user/tutorials/tutorial-keep-mount.html.textile.liquid b/doc/user/tutorials/tutorial-keep-mount.html.textile.liquid

index a41fede744e92dfaab1b43782a81121d57fff94b..c25cb65ea837cf233a0421ae0251e95aa727a0d7 100644 (file)
--- a/doc/user/tutorials/tutorial-keep-mount.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-keep-mount.html.textile.liquid
@@ -14,7 +14,7 @@ h2. Arv-mount
  
  * You can browse, open and read Keep entries as if they are regular files.
  * It is easy for existing tools to access files in Keep.
-* Data is downloaded on demand.  It is not necessary to download an entire file or collection to start processing.
+* Data is streamed on demand.  It is not necessary to download an entire file or collection to start processing.
  
  The default mode permits browsing any collection in Arvados as a subdirectory under the mount directory.  To avoid having to fetch a potentially large list of all collections, collection directories only come into existence when explicitly accessed by their Keep locator. For instance, a collection may be found by its content hash in the @keep/by_id@ directory.
  
@@ -34,3 +34,23 @@ var-GS000016015-ASM.tsv.bz2
  The last line unmounts Keep.  Subdirectories will no longer be accessible.
  
  Within each directory on Keep, there is a @.arvados#collection@ file that does not show up with @ls@. Its contents include, for instance, the @portable_data_hash@, which is the same as the Keep locator.
+
+h3. Modifying files and directories in Keep
+
+By default, all files in the Keep mount are read only.  However, @arv-mount --enable-write@ enables you to perform the following operations using normal Unix command line tools (@touch@, @mv@, @rm@, @mkdir@, @rmdir@) and your own programs using standard POSIX file system APIs:
+
+* Create, update, rename and delete individual files within collections
+* Create and delete subdirectories inside collections
+* Move files and directories within and between collections
+* Create and delete collections within a project (using @mkdir@ and @rmdir@ in a project directory)
+
+Not supported:
+
+* Symlinks, hard links
+* Changing permissions
+* Extended attributes
+* Moving a subdirectory of a collection into a project, or moving a collection from a project into another collection
+
+If multiple clients (separate instances of arv-mount or other arvados applications) modify the same file in the same collection within a short time interval, this may result in a conflict.  In this case, the most recent commit wins, and the "loser" will be renamed to a conflict file in the form @name~YYYYMMDD-HHMMSS~conflict~@.
+
+Please note this feature is in beta testing.  In particular, the conflict mechanism is itself currently subject to race conditions with potential for data loss when a collection is being modified simultaneously by multiple clients.  This issue will be resolved in future development.
diff --git a/doc/user/tutorials/tutorial-keep.html.textile.liquid b/doc/user/tutorials/tutorial-keep.html.textile.liquid

index 0d8dfb344d5b939d12317c3be2b2c3d922a213a5..5f212573bc6e7062b544adff8d8e8046179b51e3 100644 (file)
--- a/doc/user/tutorials/tutorial-keep.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-keep.html.textile.liquid
@@ -4,7 +4,7 @@ navsection: userguide
  title: "Uploading data"
  ...
  
-Arvados Data collections can be uploaded using either the @*arv keep put*@ command line tool or using Workbench.
+Arvados Data collections can be uploaded using either the @arv keep put@ command line tool or using Workbench.
  
  # "*Upload using command line tool*":#upload-using-command
  # "*Upload using Workbench*":#upload-using-workbench
@@ -24,9 +24,10 @@ qr1hi-4zz18-xxxxxxxxxxxxxxx
  </code></pre>
  </notextile>
  
+
  The output value @qr1hi-4zz18-xxxxxxxxxxxxxxx@ is the uuid of the Arvados collection created.
  
-The file used in this example is a freely available TSV file containing variant annotations from "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905.":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2.
+Note: The file used in this example is a freely available TSV file containing variant annotations from the "Personal Genome Project (PGP)":http://www.pgp-hms.org participant "hu599905":https://my.pgp-hms.org/profile/hu599905), downloadable "here":https://warehouse.pgp-hms.org/warehouse/f815ec01d5d2f11cb12874ab2ed50daa+234+K@ant/var-GS000016015-ASM.tsv.bz2. Alternatively, you can replace @var-GS000016015-ASM.tsv.bz2@ with the name of any file you have locally, or you could get the TSV file by "downloading it from Keep.":{{site.baseurl}}/user/tutorials/tutorial-keep-get.html
  
  <notextile><a name="dir"></a></notextile>It is also possible to upload an entire directory with @arv keep put@:
  
@@ -48,9 +49,9 @@ In both examples, the @arv keep put@ command created a collection. The first col
  
  h3. Locate your collection in Workbench
  
-Visit the Workbench *Dashboard*.  Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project.  Your newly uploaded collection should appear near the top of the *Data collections* tab.  The collection locator printed by @arv keep put@ will appear under the *name* column.
+Visit the Workbench *Dashboard*.  Click on *Projects*<span class="caret"></span> dropdown menu in the top navigation menu, select your *Home* project.  Your newly uploaded collection should appear near the top of the *Data collections* tab.  The collection name printed by @arv keep put@ will appear under the *name* column.
  
-To move the collection to a different project, check the box at the left of the collection row.  Pull down the *Selection...*<span class="caret"></span> menu near the top of the page tab, and select *Move selected*. This will open a dialog box where you can select a destination project for the collection.  Click a project, then finally the <span class="btn btn-sm btn-primary">Move</span> button.
+To move the collection to a different project, check the box at the left of the collection row.  Pull down the *Selection...*<span class="caret"></span> menu near the top of the page tab, and select *Move selected...* button. This will open a dialog box where you can select a destination project for the collection.  Click a project, then finally the <span class="btn btn-sm btn-primary">Move</span> button.
  
  !{display: block;margin-left: 25px;margin-right: auto;}{{ site.baseurl }}/images/workbench-move-selected.png!
  
@@ -76,4 +77,4 @@ Click on the *Browse...* button and select the files you would like to upload. S
  
  *Note:* If you leave the collection page during the upload, the upload process will be aborted and you will need to upload the files again.
  
-*Note:* You can also use the Upload tab to add files to an existing collection.
+*Note:* You can also use the Upload tab to add additional files to an existing collection.
diff --git a/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid b/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid

index 8dad6ab25e11de078f014113ef808ff57fe4109c..f9522fbef01658f68b20493516b823c2e2d3611f 100644 (file)
--- a/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-pipeline-workbench.html.textile.liquid
@@ -6,7 +6,7 @@ title: "Running a pipeline using Workbench"
  
  A "pipeline" (sometimes called a "workflow" in other systems) is a sequence of steps that apply various programs or tools to transform input data to output data.  Pipelines are the principal means of performing computation with Arvados.  This tutorial demonstrates how to run a single-stage pipeline to take a small data set of paired-end reads from a sample "exome":https://en.wikipedia.org/wiki/Exome in "FASTQ":https://en.wikipedia.org/wiki/FASTQ_format format and align them to "Chromosome 19":https://en.wikipedia.org/wiki/Chromosome_19_%28human%29 using the "bwa mem":http://bio-bwa.sourceforge.net/ tool, producing a "Sequence Alignment/Map (SAM)":https://samtools.github.io/ file.  This tutorial will introduce the following Arvados features:
  
-<div class="inside-list">
+<div>
  * How to create a new pipeline from an existing template.
  * How to browse and select input data for the pipeline and submit the pipeline to run on the Arvados cluster.
  * How to access your pipeline results.
@@ -14,13 +14,15 @@ A "pipeline" (sometimes called a "workflow" in other systems) is a sequence of s
  
  notextile. <div class="spaced-out">
  
+h3. Steps
+
  # Start from the *Workbench Dashboard*.  You can access the Dashboard by clicking on *<i class="fa fa-lg fa-fw fa-dashboard"></i> Dashboard* in the upper left corner of any Workbench page.
  # Click on the <span class="btn btn-sm btn-primary"><i class="fa fa-fw fa-gear"></i> Run a pipeline...</span> button.  This will open a dialog box titled *Choose a pipeline to run*.
  # Click to open the *All projects <span class="caret"></span>* menu.  Under the *Projects shared with me* header, select *<i class="fa fa-fw fa-share-alt"></i> Arvados Tutorial*.
  # Select *<i class="fa fa-fw fa-gear"></i> Tutorial align using bwa mem* and click the <span class="btn btn-sm btn-primary" >Next: choose inputs <i class="fa fa-fw fa-arrow-circle-right"></i></span> button.  This will create a new pipeline in your *Home* project and will open it. You can now supply the inputs for the pipeline.
-# The first input parameter to the pipeline is *Reference genoma (fasta)*.  Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath that header.  This will open a dialog box titled *Choose a dataset for Reference genome (fasta)*.
+# The first input parameter to the pipeline is *"reference_collection" parameter for run-command script in bwa-mem component*.  Click the <span class="btn btn-sm btn-primary">Choose</span> button beneath that header.  This will open a dialog box titled *Choose a dataset for "reference_collection" parameter for run-command script in bwa-mem component*.
  # Once again, open the *All projects <span class="caret"></span>* menu and select *<i class="fa fa-fw fa-share-alt"></i> Arvados Tutorial*.  Select *<i class="fa fa-fw fa-archive"></i> Tutorial chromosome 19 reference* and click the <span class="btn btn-sm btn-primary" >OK</span> button.
-# Repeat the previous two steps to set the *Input genome (fastq)* parameter to *<i class="fa fa-fw fa-archive"></i> Tutorial sample exome*.
+# Repeat the previous two steps to set the *"sample" parameter for run-command script in bwa-mem component* parameter to *<i class="fa fa-fw fa-archive"></i> Tutorial sample exome*.
  # Click on the <span class="btn btn-sm btn-primary" >Run <i class="fa fa-fw fa-play"></i></span> button.  The page updates to show you that the pipeline has been submitted to run on the Arvados cluster.
  # After the pipeline starts running, you can track the progress by watching log messages from jobs.  This page refreshes automatically.  You will see a <span class="label label-success">complete</span> label under the *job* column when the pipeline completes successfully.
  # Click on the *Output* link to see the results of the job.  This will load a new page listing the output files from this pipeline.  You'll see the output SAM file from the alignment tool under the *Files* tab.
diff --git a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid

index fc77e5cdc02162f4d42a997cbb644d7323e4adcc..b17f951e74c84867e63de156557efea21ff308f9 100644 (file)
--- a/doc/user/tutorials/tutorial-submit-job.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-submit-job.html.textile.liquid
@@ -11,49 +11,25 @@ This tutorial demonstrates how to create a pipeline to run your crunch script on
  
  This tutorial uses @$USER@ to denote your username.  Replace @$USER@ with your user name in all the following examples.
  
-h2. Setting up Git
+Also, this tutorial uses the @tutorial@ arvados repository created in "Adding a new arvados repository":add-new-repository.html as the example repository.
  
-All Crunch scripts are managed through the Git revision control system.  Before you start using Git, you should do some basic configuration (you only need to do this the first time):
+h2. Clone Arvados repository
  
-<notextile>
-<pre><code>~$ <span class="userinput">git config --global user.name "Your Name"</span>
-~$ <span class="userinput">git config --global user.email $USER@example.com</span></code></pre>
-</notextile>
-
-On the Arvados Workbench, navigate to "Code repositories":https://{{site.arvados_workbench_host}}/repositories.  You should see a repository with your user name listed in the *name* column.  Next to *name* is the column *push_url*.  Copy the *push_url* value associated with your repository.  This should look like <notextile><code>git@git.{{ site.arvados_api_host }}:$USER.git</code></notextile>.
-
-Next, on the Arvados virtual machine, clone your Git repository:
-
-<notextile>
-<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
-~$ <span class="userinput">git clone git@git.{{ site.arvados_api_host }}:$USER.git</span>
-Cloning into '$USER'...</code></pre>
-</notextile>
-
-This will create a Git repository in the directory called @$USER@ in your home directory. Say yes when prompted to continue with connection.
-Ignore any warning that you are cloning an empty repository.
-
-{% include 'notebox_begin' %}
-For more information about using Git, try
-
-notextile. <pre><code>$ <span class="userinput">man gittutorial</span></code></pre>
-
-or *"search Google for Git tutorials":http://google.com/#q=git+tutorial*.
-{% include 'notebox_end' %}
+Please clone the *tutorial* repository using the instructions from "Working with Arvados git repository":git-arvados-guide.html, if you have not yet cloned already.
  
  h2. Creating a Crunch script
  
-Start by entering the @$USER@ directory created by @git clone@.  Next create a subdirectory called @crunch_scripts@ and change to that directory:
+Start by entering the @tutorial@ directory created by @git clone@. Next, create a subdirectory called @crunch_scripts@ and change to that directory:
  
  <notextile>
-<pre><code>~$ <span class="userinput">cd $USER</span>
-~/$USER$ <span class="userinput">mkdir crunch_scripts</span>
-~/$USER$ <span class="userinput">cd crunch_scripts</span></code></pre>
+<pre><code>>~$ <span class="userinput">cd tutorial</span>
+~/tutorial$ <span class="userinput">mkdir crunch_scripts</span>
+~/tutorial$ <span class="userinput">cd crunch_scripts</span></code></pre>
  </notextile>
  
  Next, using @nano@ or your favorite Unix text editor, create a new file called @hash.py@ in the @crunch_scripts@ directory.
  
-notextile. <pre>~/$USER/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
+notextile. <pre>~/tutorial/crunch_scripts$ <code class="userinput">nano hash.py</code></pre>
  
  Add the following code to compute the MD5 hash of each file in a collection (if you already completed "Writing a Crunch script":tutorial-firstscript.html you can just copy the @hash.py@ file you created previously.)
  
@@ -61,16 +37,16 @@ Add the following code to compute the MD5 hash of each file in a collection (if
  
  Make the file executable:
  
-notextile. <pre><code>~/$USER/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
+notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">chmod +x hash.py</span></code></pre>
  
  Next, add the file to the staging area.  This tells @git@ that the file should be included on the next commit.
  
-notextile. <pre><code>~/$USER/crunch_scripts$ <span class="userinput">git add hash.py</span></code></pre>
+notextile. <pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git add hash.py</span></code></pre>
  
  Next, commit your changes.  All staged changes are recorded into the local git repository:
  
  <notextile>
-<pre><code>~/$USER/crunch_scripts$ <span class="userinput">git commit -m"my first script"</span>
+<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git commit -m "my first script"</span>
  [master (root-commit) 27fd88b] my first script
   1 file changed, 45 insertions(+)
   create mode 100755 crunch_scripts/hash.py</code></pre>
@@ -79,12 +55,12 @@ Next, commit your changes.  All staged changes are recorded into the local git r
  Finally, upload your changes to the Arvados server:
  
  <notextile>
-<pre><code>~/$USER/crunch_scripts$ <span class="userinput">git push origin master</span>
+<pre><code>~/tutorial/crunch_scripts$ <span class="userinput">git push origin master</span>
  Counting objects: 4, done.
  Compressing objects: 100% (2/2), done.
  Writing objects: 100% (4/4), 682 bytes, done.
  Total 4 (delta 0), reused 0 (delta 0)
-To git@git.qr1hi.arvadosapi.com:$USER.git
+To git@git.qr1hi.arvadosapi.com:$USER/tutorial.git
   * [new branch]      master -> master</code></pre>
  </notextile>
  
@@ -100,13 +76,13 @@ In the editor, enter the following template:
  
  <notextile> {% code 'tutorial_submit_job' as javascript %} </notextile>
  
-* @"repository"@ is the name of a git repository to search for the script version.  You can access a list of available git repositories on the Arvados Workbench under "Code repositories":https://{{site.arvados_workbench_host}}/repositories.
+* @"repository"@ is the name of a git repository to search for the script version.  You can access a list of available git repositories on the Arvados Workbench in the *Repositories* page using the <span class="fa fa-lg fa-user"></span> <span class="caret"></span> top navigation menu icon.
  * @"script_version"@ specifies the version of the script that you wish to run.  This can be in the form of an explicit Git revision hash, a tag, or a branch (in which case it will use the HEAD of the specified branch).  Arvados logs the script version that was used in the run, enabling you to go back and re-run any past job with the guarantee that the exact same code will be used as was used in the previous run.
  * @"script"@ specifies the filename of the script to run.  Crunch expects to find this in the @crunch_scripts/@ subdirectory of the Git repository.
  * @"runtime_constraints"@ describes the runtime environment required to run the job.  These are described in the "job record schema":{{site.baseurl}}/api/schema/Job.html
  
  h2. Running your pipeline
  
-Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":https://{{ site.arvados_workbench_host }}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
+Your new pipeline template should appear at the top of the Workbench "pipeline&nbsp;templates":{{site.arvados_workbench_host}}/pipeline_templates page.  You can run your pipeline "using Workbench":tutorial-pipeline-workbench.html or the "command line.":{{site.baseurl}}/user/topics/running-pipeline-command-line.html
  
  For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/schema/PipelineTemplate.html
diff --git a/docker/README.md b/docker/README.md

index f521b8c90135dfdcf06923c2b4d6f6ad0c56d14f..9c03e1b825f3408050def680bc812617b1be9d84 100644 (file)
--- a/docker/README.md
+++ b/docker/README.md
@@ -6,16 +6,13 @@ containers, so that they can be run easily in different environments
  (a dedicated server, a developer's laptop, a virtual machine,
  etc).
  
-This is a work in progress; instructions will almost certainly be
-incomplete and possibly out of date.
-
  Prerequisites
  -------------
  
  * Docker
  
-  Docker is a Linux container management system based on LXC. It is a
-  very young system but is being developed rapidly.
+  Docker is a Linux container management system. It is a very young system but
+  is being developed rapidly.
    [Installation packages](http://www.docker.io/gettingstarted/)
    are available for several platforms.
    
@@ -52,10 +49,13 @@ Building
  Type `./build.sh` to configure and build the following Docker images:
  
     * arvados/api       - the Arvados API server
+   * arvados/compute   - Arvados compute node image
     * arvados/doc       - Arvados documentation
-   * arvados/warehouse - Keep, the Arvados content-addressable filesystem
-   * arvados/workbench - the Arvados console
+   * arvados/keep      - Keep, the Arvados content-addressable filesystem
+   * arvados/keepproxy - Keep proxy
+   * arvados/shell     - Arvados shell node image
     * arvados/sso       - the Arvados single-signon authentication server
+   * arvados/workbench - the Arvados console
  
  `build.sh` will generate reasonable defaults for all configuration
  settings.  If you want more control over the way Arvados is
diff --git a/docker/api/.gitolite.rc b/docker/api/.gitolite.rc

new file mode 100644 (file)

index 0000000..855e103
--- /dev/null
+++ b/docker/api/.gitolite.rc
@@ -0,0 +1,191 @@
+# configuration variables for gitolite
+
+# This file is in perl syntax.  But you do NOT need to know perl to edit it --
+# just mind the commas, use single quotes unless you know what you're doing,
+# and make sure the brackets and braces stay matched up!
+
+# (Tip: perl allows a comma after the last item in a list also!)
+
+# HELP for commands can be had by running the command with "-h".
+
+# HELP for all the other FEATURES can be found in the documentation (look for
+# "list of non-core programs shipped with gitolite" in the master index) or
+# directly in the corresponding source file.
+
+my $repo_aliases;
+my $aliases_src = "$ENV{HOME}/.gitolite/arvadosaliases.pl";
+if ($ENV{HOME} && (-e $aliases_src)) {
+    $repo_aliases = do $aliases_src;
+}
+$repo_aliases ||= {};
+
+%RC = (
+
+    # ------------------------------------------------------------------
+
+    # default umask gives you perms of '0700'; see the rc file docs for
+    # how/why you might change this
+    UMASK                           =>  0022,
+
+    # look for "git-config" in the documentation
+    GIT_CONFIG_KEYS                 =>  '',
+
+    # comment out if you don't need all the extra detail in the logfile
+    LOG_EXTRA                       =>  1,
+
+    # roles.  add more roles (like MANAGER, TESTER, ...) here.
+    #   WARNING: if you make changes to this hash, you MUST run 'gitolite
+    #   compile' afterward, and possibly also 'gitolite trigger POST_COMPILE'
+    ROLES => {
+        READERS                     =>  1,
+        WRITERS                     =>  1,
+    },
+
+    REPO_ALIASES => $repo_aliases,
+
+    # ------------------------------------------------------------------
+
+    # rc variables used by various features
+
+    # the 'info' command prints this as additional info, if it is set
+        # SITE_INFO                 =>  'Please see http://blahblah/gitolite for more help',
+
+    # the 'desc' command uses this
+        # WRITER_CAN_UPDATE_DESC    =>  1,
+    # the 'readme' command uses this
+        # WRITER_CAN_UPDATE_README  =>  1,
+
+    # the CpuTime feature uses these
+        # display user, system, and elapsed times to user after each git operation
+        # DISPLAY_CPU_TIME          =>  1,
+        # display a warning if total CPU times (u, s, cu, cs) crosses this limit
+        # CPU_TIME_WARN_LIMIT       =>  0.1,
+
+    # the Mirroring feature needs this
+        # HOSTNAME                  =>  "foo",
+
+    # if you enabled 'Shell', you need this
+        # SHELL_USERS_LIST          =>  "$ENV{HOME}/.gitolite.shell-users",
+
+    # ------------------------------------------------------------------
+
+    # suggested locations for site-local gitolite code (see cust.html)
+
+        # this one is managed directly on the server
+        # LOCAL_CODE                =>  "$ENV{HOME}/local",
+
+        # or you can use this, which lets you put everything in a subdirectory
+        # called "local" in your gitolite-admin repo.  For a SECURITY WARNING
+        # on this, see http://gitolite.com/gitolite/cust.html#pushcode
+        # LOCAL_CODE                =>  "$rc{GL_ADMIN_BASE}/local",
+
+    # ------------------------------------------------------------------
+
+    # List of commands and features to enable
+
+    ENABLE => [
+
+        # COMMANDS
+
+            # These are the commands enabled by default
+            'help',
+            'desc',
+            'info',
+            'perms',
+            'writable',
+
+            # Uncomment or add new commands here.
+            # 'create',
+            # 'fork',
+            # 'mirror',
+            # 'readme',
+            # 'sskm',
+            # 'D',
+
+        # These FEATURES are enabled by default.
+
+            # essential (unless you're using smart-http mode)
+            'ssh-authkeys',
+
+            # creates git-config enties from gitolite.conf file entries like 'config foo.bar = baz'
+            'git-config',
+
+            # creates git-daemon-export-ok files; if you don't use git-daemon, comment this out
+            'daemon',
+
+            # creates projects.list file; if you don't use gitweb, comment this out
+            'gitweb',
+
+        # These FEATURES are disabled by default; uncomment to enable.  If you
+        # need to add new ones, ask on the mailing list :-)
+
+        # user-visible behaviour
+
+            # prevent wild repos auto-create on fetch/clone
+            # 'no-create-on-read',
+            # no auto-create at all (don't forget to enable the 'create' command!)
+            # 'no-auto-create',
+
+            # access a repo by another (possibly legacy) name
+            'Alias',
+
+            # give some users direct shell access
+            # 'Shell',
+
+            # set default roles from lines like 'option default.roles-1 = ...', etc.
+            # 'set-default-roles',
+
+            # show more detailed messages on deny
+            # 'expand-deny-messages',
+
+        # system admin stuff
+
+            # enable mirroring (don't forget to set the HOSTNAME too!)
+            # 'Mirroring',
+
+            # allow people to submit pub files with more than one key in them
+            # 'ssh-authkeys-split',
+
+            # selective read control hack
+            # 'partial-copy',
+
+            # manage local, gitolite-controlled, copies of read-only upstream repos
+            # 'upstream',
+
+            # updates 'description' file instead of 'gitweb.description' config item
+            # 'cgit',
+
+            # allow repo-specific hooks to be added
+            # 'repo-specific-hooks',
+
+        # performance, logging, monitoring...
+
+            # be nice
+            # 'renice 10',
+
+            # log CPU times (user, system, cumulative user, cumulative system)
+            # 'CpuTime',
+
+        # syntactic_sugar for gitolite.conf and included files
+
+            # allow backslash-escaped continuation lines in gitolite.conf
+            # 'continuation-lines',
+
+            # create implicit user groups from directory names in keydir/
+            # 'keysubdirs-as-groups',
+
+            # allow simple line-oriented macros
+            # 'macros',
+
+    ],
+
+);
+
+# ------------------------------------------------------------------------------
+# per perl rules, this should be the last line in such a file:
+1;
+
+# Local variables:
+# mode: perl
+# End:
+# vim: set syn=perl:
diff --git a/docker/api/Dockerfile b/docker/api/Dockerfile

index abd2114302ac5f3595a3a0e0a031383342f601ff..6a3428c4a93ebd1167d70f0c9d60b7290bcdbed4 100644 (file)
--- a/docker/api/Dockerfile
+++ b/docker/api/Dockerfile
@@ -1,12 +1,12 @@
  # Arvados API server Docker container.
  
  FROM arvados/passenger
-MAINTAINER Tim Pierce <twp@curoverse.com>
+MAINTAINER Ward Vandewege <ward@curoverse.com>
  
  # Install postgres and apache.
-RUN apt-get update -qq
-RUN apt-get install -qqy \
-    procps postgresql postgresql-server-dev-9.1 apache2 slurm-llnl munge \
+RUN apt-get update -q
+RUN apt-get install -qy \
+    procps postgresql postgresql-server-dev-9.1 slurm-llnl munge \
      supervisor sudo libwww-perl libio-socket-ssl-perl libcrypt-ssleay-perl \
      libjson-perl cron openssh-server
  
@@ -25,7 +25,6 @@ ADD generated/database.yml /usr/src/arvados/services/api/config/database.yml
  ADD generated/omniauth.rb /usr/src/arvados/services/api/config/initializers/omniauth.rb
  RUN /bin/cp /usr/src/arvados/services/api/config/environments/production.rb.example /usr/src/arvados/services/api/config/environments/production.rb
  ADD generated/application.yml /usr/src/arvados/services/api/config/application.yml
-ADD generated/apache2_vhost /etc/apache2/sites-available/arvados
  
  # Configure Rails databases.
  ENV RAILS_ENV production
@@ -45,13 +44,6 @@ RUN /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/arvado
      mkdir -p tmp && \
      chown www-data:www-data tmp -R
  
-# Configure Apache and Passenger.
-RUN a2dissite default && \
-    a2ensite arvados && \
-    a2enmod rewrite && \
-    a2enmod ssl && \
-    /bin/mkdir /var/run/apache2
-
  # Install a token for root
  RUN mkdir -p /root/.config/arvados; echo "ARVADOS_API_HOST=api" >> /root/.config/arvados/settings.conf && echo "ARVADOS_API_HOST_INSECURE=yes" >> /root/.config/arvados/settings.conf && echo "ARVADOS_API_TOKEN=$(cat /tmp/superuser_token)" >> /root/.config/arvados/settings.conf && chmod 600 /root/.config/arvados/settings.conf
  
@@ -61,20 +53,23 @@ RUN mkdir -p /var/lib/arvados
  RUN addgroup --gid 4005 crunch && mkdir /home/crunch && useradd --uid 4005 --gid 4005 crunch && chown crunch:crunch /home/crunch
  
  # Create keep and compute node objects
-ADD keep_server_0.json /root/
-ADD keep_server_1.json /root/
+ADD generated/keep_server_0.json /root/
+ADD generated/keep_server_1.json /root/
+ADD keep_proxy.json /root/
  
  # Set up update-gitolite.rb
  RUN mkdir /usr/local/arvados/config -p
-ADD generated/arvados-clients.yml /usr/local/arvados/config/
-ADD update-gitolite.rb /usr/local/arvados/
+ADD generated/arvados-clients.yml /usr/src/arvados/services/api/config/
+ADD .gitolite.rc /usr/local/arvados/config/
+RUN ln /usr/src/arvados/services/api/script/arvados-git-sync.rb /usr/local/bin/
  
  # Supervisor.
  ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
  ADD generated/setup.sh /usr/local/bin/setup.sh
  ADD generated/setup-gitolite.sh /usr/local/bin/setup-gitolite.sh
  ADD crunch-dispatch-run.sh /usr/local/bin/crunch-dispatch-run.sh
-ADD apache2_foreground.sh /etc/apache2/foreground.sh
+ADD munge.sh /usr/local/bin/munge.sh
+ADD passenger.sh /usr/local/bin/passenger.sh
  
  # Start the supervisor.
  CMD ["/usr/bin/supervisord", "-n"]
diff --git a/docker/api/apache2_vhost.in b/docker/api/apache2_vhost.in

index fdbb2f929332089b3d36c6b817a8572b4d98f2bb..344e36d1759275c9558a93009dcacaf9e61817b4 100644 (file)
--- a/docker/api/apache2_vhost.in
+++ b/docker/api/apache2_vhost.in
@@ -42,8 +42,6 @@
      SSLEngine on
      # SSLCertificateChainFile /etc/ssl/certs/startcom.sub.class1.server.ca.pem
      # SSLCACertificateFile    /etc/ssl/certs/startcom.ca.pem
-    # SSLCertificateFile      /etc/ssl/certs/qr1hi.arvadosapi.com.crt.pem
-    # SSLCertificateKeyFile   /etc/ssl/private/qr1hi.arvadosapi.com.key.pem
      SSLCertificateFile    /etc/ssl/certs/ssl-cert-snakeoil.pem
      SSLCertificateKeyFile /etc/ssl/private/ssl-cert-snakeoil.key
      SetEnvIf User-Agent ".*MSIE.*" nokeepalive ssl-unclean-shutdown
diff --git a/docker/api/application.yml.in b/docker/api/application.yml.in

index 7f419853cd68dbd70f4f500683d4b5c79388e694..97eb66f5db2fc79bcd1236a6895270a2eec22ea8 100644 (file)
--- a/docker/api/application.yml.in
+++ b/docker/api/application.yml.in
@@ -19,6 +19,13 @@ development:
    blob_signing_key: ~
  
  production:
+  host: api.@@ARVADOS_DOMAIN@@
+
+  git_repo_ssh_base: "git@api.@@ARVADOS_DOMAIN@@:"
+
+  # Docker setup doesn't include arv-git-httpd yet.
+  git_repo_https_base: false
+
    # At minimum, you need a nice long randomly generated secret_token here.
    # Use a long string of alphanumeric characters (at least 36).
    secret_token: @@API_SECRET@@
@@ -30,26 +37,11 @@ production:
  
    uuid_prefix: @@API_HOSTNAME@@
  
-  # The e-mail address of the user you would like to become marked as an admin
-  # user on their first login.
-  # In the default configuration, authentication happens through the Arvados SSO
-  # server, which uses openid against Google's servers, so in that case this
-  # should be an address associated with a Google account.
-  auto_admin_user: @@API_AUTO_ADMIN_USER@@
-
    # compute_node_domain: example.org
    # compute_node_nameservers:
    #   - 127.0.0.1
    #   - 192.168.1.1
    #
-  # The version below is suitable for AWS.
-  # Uncomment and change <%# to <%= to use it.
-  # compute_node_nameservers: <%#
-  #   require 'net/http'
-  #   ['local', 'public'].collect do |iface|
-  #     Net::HTTP.get(URI("http://169.254.169.254/latest/meta-data/#{iface}-ipv4")).match(/^[\d\.]+$/)[0]
-  #   end << '172.16.0.23'
-  # %>
    permit_create_collection_with_unsigned_manifest: true
    git_repositories_dir: /home/git/repositories
    crunch_job_wrapper: :slurm_immediate
@@ -62,6 +54,10 @@ production:
  
    auto_admin_first_user: true
  
+  auto_setup_new_users_with_repository: true
+
+  auto_setup_new_users_with_vm_uuid: @@API_HOSTNAME@@-2x53u-csbtkecoa669vkz
+
  test:
    uuid_prefix: zzzzz
    secret_token: <%= rand(2**512).to_s(36) %>
@@ -69,4 +65,3 @@ test:
  common:
    #git_repositories_dir: /var/cache/git
    #git_internal_dir: /var/cache/arvados/internal.git
-
diff --git a/docker/api/arvados-clients.yml.in b/docker/api/arvados-clients.yml.in

index 59ff352a1889a5edc3fcd7fe27343e889c772231..6741328501cc9f089dae57db0e1810f2cc393a5f 100644 (file)
--- a/docker/api/arvados-clients.yml.in
+++ b/docker/api/arvados-clients.yml.in
@@ -1,5 +1,5 @@
  production:
-  gitolite_url: 'git@api:gitolite-admin.git'
+  gitolite_url: 'git@api.@@ARVADOS_DOMAIN@@:gitolite-admin.git'
    gitolite_tmp: 'gitolite-tmp'
    arvados_api_host: 'api'
    arvados_api_token: '@@API_SUPERUSER_SECRET@@'
diff --git a/docker/api/keep_proxy.json b/docker/api/keep_proxy.json

new file mode 100644 (file)

index 0000000..117e590
--- /dev/null
+++ b/docker/api/keep_proxy.json
@@ -0,0 +1,6 @@
+{
+  "service_host": "localhost",
+  "service_port": 9902,
+  "service_ssl_flag": "false",
+  "service_type": "proxy"
+}
diff --git a/docker/api/keep_server_0.json b/docker/api/keep_server_0.json.in

similarity index 59%

rename from docker/api/keep_server_0.json

rename to docker/api/keep_server_0.json.in

index ce02f5086559963d4641a9890a4d465a8221e65a..d63c5900ade6865eeb3f983f74dfca52ae2a13cf 100644 (file)
--- a/docker/api/keep_server_0.json
+++ b/docker/api/keep_server_0.json.in
@@ -1,5 +1,5 @@
  {
-  "service_host": "keep_server_0.keep.dev.arvados",
+  "service_host": "keep_server_0.keep.@@ARVADOS_DOMAIN@@",
    "service_port": 25107,
    "service_ssl_flag": "false",
    "service_type": "disk"
diff --git a/docker/api/keep_server_1.json b/docker/api/keep_server_1.json.in

similarity index 59%

rename from docker/api/keep_server_1.json

rename to docker/api/keep_server_1.json.in

index dbbdd1c31f52fef7a3cebb3b34ce6e4dbfb0c29c..53d5c642c2ad5c624fd6c8f106cfcdea95adf2f2 100644 (file)
--- a/docker/api/keep_server_1.json
+++ b/docker/api/keep_server_1.json.in
@@ -1,7 +1,6 @@
  {
-  "service_host": "keep_server_1.keep.dev.arvados",
+  "service_host": "keep_server_1.keep.@@ARVADOS_DOMAIN@@",
    "service_port": 25107,
    "service_ssl_flag": "false",
    "service_type": "disk"
  }
-
diff --git a/docker/api/munge.sh b/docker/api/munge.sh

new file mode 100755 (executable)

index 0000000..ef10d01
--- /dev/null
+++ b/docker/api/munge.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+rm -rf /var/run/munge
+exec /etc/init.d/munge start
diff --git a/docker/api/omniauth.rb.in b/docker/api/omniauth.rb.in

index 198668e0fc39d2a21f8b6ae83f2508ac59da8b07..8daa30043b19842037cf0b6f02efd2b273365b01 100644 (file)
--- a/docker/api/omniauth.rb.in
+++ b/docker/api/omniauth.rb.in
@@ -7,7 +7,7 @@ APP_SECRET = '@@SSO_CLIENT_SECRET@@'
  if '@@OMNIAUTH_URL@@' != ''
    CUSTOM_PROVIDER_URL = '@@OMNIAUTH_URL@@'
  else
-  CUSTOM_PROVIDER_URL = 'https://' + ENV['SSO_PORT_443_TCP_ADDR'].to_s
+  CUSTOM_PROVIDER_URL = 'https://@@SSO_HOSTNAME@@.@@ARVADOS_DOMAIN@@'
  end
  
  # This is a development sandbox, we use self-signed certificates
diff --git a/docker/api/passenger.sh b/docker/api/passenger.sh

new file mode 100755 (executable)

index 0000000..a62d9d5
--- /dev/null
+++ b/docker/api/passenger.sh
@@ -0,0 +1,7 @@
+#!/bin/sh
+
+cd /usr/src/arvados/services/api
+export ARVADOS_WEBSOCKETS=1
+export RAILS_ENV=production
+/usr/local/rvm/bin/rvm-exec default bundle exec rake db:migrate
+exec /usr/local/rvm/bin/rvm-exec default bundle exec passenger start -p443 --ssl --ssl-certificate=/etc/ssl/certs/ssl-cert-snakeoil.pem --ssl-certificate-key=/etc/ssl/private/ssl-cert-snakeoil.key
diff --git a/docker/api/setup-gitolite.sh.in b/docker/api/setup-gitolite.sh.in

index 92014f990678c6a0cf9edbb5bb42e027f77508e7..023ca5de2e97858d4c0db0fbc15abc0155c46e9b 100755 (executable)
--- a/docker/api/setup-gitolite.sh.in
+++ b/docker/api/setup-gitolite.sh.in
@@ -13,10 +13,7 @@ su - git -c "mkdir -p ~/bin"
  su - git -c "git clone git://github.com/sitaramc/gitolite"
  su - git -c "gitolite/install -ln ~/bin"
  su - git -c "PATH=/home/git/bin:$PATH gitolite setup -pk ~git/root-authorized_keys.pub"
-
-# Make sure the repositories are created in such a way that they are readable
-# by the api server
-sed -i 's/0077/0022/g' /home/git/.gitolite.rc
+install -o git -g git -m 600 /usr/local/arvados/config/.gitolite.rc /home/git/
  
  # And make sure that the existing repos are equally readable, or the API server commit model will freak out...
  chmod 755 /home/git/repositories
@@ -25,9 +22,9 @@ chmod +rx /home/git/repositories/*git -R
  # Now set up the gitolite repo(s) we use
  mkdir -p /usr/local/arvados/gitolite-tmp/
  # Make ssh store the host key
-ssh -o "StrictHostKeyChecking no" git@api info
+ssh -o "StrictHostKeyChecking no" git@api.@@ARVADOS_DOMAIN@@ info
  # Now check out the tree
-git clone git@api:gitolite-admin.git /usr/local/arvados/gitolite-tmp/gitolite-admin/
+git clone git@api.@@ARVADOS_DOMAIN@@:gitolite-admin.git /usr/local/arvados/gitolite-tmp/gitolite-admin/
  cd /usr/local/arvados/gitolite-tmp/gitolite-admin
  mkdir keydir/arvados
  mkdir conf/admin
@@ -63,7 +60,7 @@ git push
  su - git -c "git clone --bare git://github.com/curoverse/arvados.git /home/git/repositories/arvados.git"
  
  echo "ARVADOS_API_HOST_INSECURE=yes" > /etc/cron.d/gitolite-update
-echo "*/2 * * * * root /bin/bash -c 'source /etc/profile.d/rvm.sh && /usr/local/arvados/update-gitolite.rb production'" >> /etc/cron.d/gitolite-update
+echo "*/2 * * * * root /bin/bash -c 'source /etc/profile.d/rvm.sh && /usr/src/arvados/services/api/script/arvados-git-sync.rb production'" >> /etc/cron.d/gitolite-update
  
  # Create/update the repos now
  . /etc/profile.d/rvm.sh
@@ -74,4 +71,3 @@ export ARVADOS_API_TOKEN=@@API_SUPERUSER_SECRET@@
  
  echo "PATH=/usr/bin:/bin:/sbin" > /etc/cron.d/arvados-repo-update
  echo "*/5 * * * * git cd ~git/repositories/arvados.git; git fetch https://github.com/curoverse/arvados.git master:master" >> /etc/cron.d/arvados-repo-update
-
diff --git a/docker/api/setup.sh.in b/docker/api/setup.sh.in

index 7af6afb237e364ffafeceb826c039dfa802b5fd4..2c7da923d8b7dfd6f4a33f9c5ed8f175d58e463d 100755 (executable)
--- a/docker/api/setup.sh.in
+++ b/docker/api/setup.sh.in
@@ -2,15 +2,25 @@
  
  set -x
  
+if test -f /root/finished_arvados_setup ; then
+   exit
+fi
+
  . /etc/profile.d/rvm.sh
  
  export ARVADOS_API_HOST=api
  export ARVADOS_API_HOST_INSECURE=yes
  export ARVADOS_API_TOKEN=@@API_SUPERUSER_SECRET@@
+export HOME=/root
+
+# Wait for API server to come up.
+while ! arv user current ; do sleep 1 ; done
  
  # Arvados repository object
-all_users_group_uuid="$prefix-j7d0g-fffffffffffffff"
-repo_uuid=`arv --format=uuid repository create --repository '{"name":"arvados","fetch_url":"git@api:arvados.git","push_url":"git@api:arvados.git"}'`
+all_users_group_uuid="@@API_HOSTNAME@@-j7d0g-fffffffffffffff"
+
+arv user update --uuid @@API_HOSTNAME@@-tpzed-000000000000000 --user '{"username":"root"}'
+repo_uuid=`arv --format=uuid repository create --repository '{"owner_uuid":"@@API_HOSTNAME@@-tpzed-000000000000000", "name":"arvados"}'`
  echo "Arvados repository uuid is $repo_uuid"
  
  read -rd $'\000' newlink <<EOF; arv link create --link "$newlink"
@@ -35,19 +45,38 @@ if [[ "$?" != "0" ]]; then
    arv keep_service create --keep-service "$(cat /root/keep_server_1.json)"
  fi
  
+grep -q keep_proxy /tmp/keep_service.list
+if [[ "$?" != "0" ]]; then
+  arv keep_service create --keep-service "$(cat /root/keep_proxy.json)"
+fi
+
  # User repository object
-user_uuid=`arv --format=uuid user current`
-repo_uuid=`arv --format=uuid repository create --repository '{"name":"@@ARVADOS_USER_NAME@@","fetch_url":"git@api:@@ARVADOS_USER_NAME@@.git","push_url":"git@api:@@ARVADOS_USER_NAME@@.git"}'`
-echo "User repository uuid is $repo_uuid"
+# user_uuid=`arv --format=uuid user current`
+# repo_uuid=`arv --format=uuid repository create --repository '{"name":"@@ARVADOS_USER_NAME@@","fetch_url":"git@api.dev.arvados:@@ARVADOS_USER_NAME@@.git","push_url":"git@api.dev.arvados:@@ARVADOS_USER_NAME@@.git"}'`
  
-read -rd $'\000' newlink <<EOF; arv link create --link "$newlink"
-{
- "tail_uuid":"$user_uuid",
- "head_uuid":"$repo_uuid",
- "link_class":"permission",
- "name":"can_write"
-}
-EOF
+# echo "User repository uuid is $repo_uuid"
+
+# read -rd $'\000' newlink <<EOF; arv link create --link "$newlink"
+# {
+#  "tail_uuid":"$user_uuid",
+#  "head_uuid":"$repo_uuid",
+#  "link_class":"permission",
+#  "name":"can_write"
+# }
+# EOF
+
+# # Shell machine object
+shell_uuid=`arv --format=uuid virtual_machine create --virtual-machine '{"hostname":"shell"}'`
+arv virtual_machine create --virtual-machine '{"hostname":"shell.dev", "uuid": "@@API_HOSTNAME@@-2x53u-csbtkecoa669vkz"}'
+
+# read -rd $'\000' newlink <<EOF; arv link create --link "$newlink"
+# {
+#  "tail_uuid":"$user_uuid",
+#  "head_uuid":"$shell_uuid",
+#  "link_class":"permission",
+#  "name":"can_login",
+#  "properties": {"username": "@@ARVADOS_USER_NAME@@"}
+# }
+# EOF
  
-# Shell machine object
-arv virtual_machine create --virtual-machine '{"hostname":"shell"}'
+touch /root/finished_arvados_setup
diff --git a/docker/api/supervisor.conf b/docker/api/supervisor.conf

index b01dc1c11051a723e35f8f56dfc8ce8c0a58d7f3..b24e552efd306a4c92b27e2ba15d1fe32930464c 100644 (file)
--- a/docker/api/supervisor.conf
+++ b/docker/api/supervisor.conf
@@ -8,14 +8,13 @@ user=postgres
  command=/usr/lib/postgresql/9.1/bin/postgres -D /var/lib/postgresql/9.1/main -c config_file=/etc/postgresql/9.1/main/postgresql.conf
  autorestart=true
  
-[program:apache2]
-command=/etc/apache2/foreground.sh
-stopsignal=6
+[program:passenger]
+command=/usr/local/bin/passenger.sh
  autorestart=true
  
  [program:munge]
  user=root
-command=/etc/init.d/munge start
+command=/usr/local/bin/munge.sh
  startsecs=0
  
  [program:slurm]
diff --git a/docker/api/update-gitolite.rb b/docker/api/update-gitolite.rb

deleted file mode 100755 (executable)

index 2c46a0d..0000000
--- a/docker/api/update-gitolite.rb
+++ /dev/null
@@ -1,168 +0,0 @@
-#!/usr/bin/env ruby
-
-require 'rubygems'
-require 'pp'
-require 'arvados'
-require 'active_support/all'
-require 'yaml'
-
-# This script does the actual gitolite config management on disk.
-#
-# Ward Vandewege <ward@curoverse.com>
-
-# Default is development
-production = ARGV[0] == "production"
-
-ENV["RAILS_ENV"] = "development"
-ENV["RAILS_ENV"] = "production" if production
-
-DEBUG = 1
-
-# load and merge in the environment-specific application config info
-# if present, overriding base config parameters as specified
-path = File.dirname(__FILE__) + '/config/arvados-clients.yml'
-if File.exists?(path) then
-  cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
-else
-  puts "Please create a\n " + File.dirname(__FILE__) + "/config/arvados-clients.yml\n file"
-  exit 1
-end
-
-gitolite_url = cp_config['gitolite_url']
-gitolite_tmp = cp_config['gitolite_tmp']
-
-gitolite_admin = File.join(File.expand_path(File.dirname(__FILE__)) + '/' + gitolite_tmp + '/gitolite-admin')
-
-ENV['ARVADOS_API_HOST'] = cp_config['arvados_api_host']
-ENV['ARVADOS_API_TOKEN'] = cp_config['arvados_api_token']
-if cp_config['arvados_api_host_insecure']
-  ENV['ARVADOS_API_HOST_INSECURE'] = 'true'
-else
-  ENV.delete('ARVADOS_API_HOST_INSECURE')
-end
-
-keys = ''
-
-seen = Hash.new
-
-def ensure_repo(name,permissions,user_keys,gitolite_admin)
-  tmp = ''
-  # Just in case...
-  name.gsub!(/[^a-z0-9]/i,'')
-
-  keys = Hash.new()
-
-  user_keys.each do |uuid,p|
-    p.each do |k|
-      next if k[:public_key].nil?
-      keys[uuid] = Array.new() if not keys.key?(uuid)
-
-      key = k[:public_key]
-      # Handle putty-style ssh public keys
-      key.sub!(/^(Comment: "r[^\n]*\n)(.*)$/m,'ssh-rsa \2 \1')
-      key.sub!(/^(Comment: "d[^\n]*\n)(.*)$/m,'ssh-dss \2 \1')
-      key.gsub!(/\n/,'')
-      key.strip
-
-      keys[uuid].push(key)
-    end
-  end
-
-  cf = gitolite_admin + '/conf/auto/' + name + '.conf'
-
-  conf = "\nrepo #{name}\n"
-
-  commit = false
-
-  seen = {}
-  permissions.sort.each do |uuid,v|
-    conf += "\t#{v[:gitolite_permissions]}\t= #{uuid.to_s}\n"
-
-    count = 0
-    keys.include?(uuid) and keys[uuid].each do |v|
-      kf = gitolite_admin + '/keydir/arvados/' + uuid.to_s + "@#{count}.pub"
-      seen[kf] = true
-      if !File.exists?(kf) or IO::read(kf) != v then
-        commit = true
-        f = File.new(kf + ".tmp",'w')
-        f.write(v)
-        f.close()
-        # File.rename will overwrite the destination file if it exists
-        File.rename(kf + ".tmp",kf);
-      end
-      count += 1
-    end
-  end
-
-  if !File.exists?(cf) or IO::read(cf) != conf then
-    commit = true
-    f = File.new(cf + ".tmp",'w')
-    f.write(conf)
-    f.close()
-    # this is about as atomic as we can make the replacement of the file...
-    File.unlink(cf) if File.exists?(cf)
-    File.rename(cf + ".tmp",cf);
-  end
-
-  return commit,seen
-end
-
-begin
-
-  pwd = Dir.pwd
-  # Get our local gitolite-admin repo up to snuff
-  if not File.exists?(File.dirname(__FILE__) + '/' + gitolite_tmp) then
-    Dir.mkdir(File.join(File.dirname(__FILE__) + '/' + gitolite_tmp), 0700)
-  end
-  if not File.exists?(gitolite_admin) then
-    Dir.chdir(File.join(File.dirname(__FILE__) + '/' + gitolite_tmp))
-    `git clone #{gitolite_url}`
-  else
-    Dir.chdir(gitolite_admin)
-    `git pull`
-  end
-  Dir.chdir(pwd)
-
-  arv = Arvados.new( { :suppress_ssl_warnings => false } )
-
-  permissions = arv.repository.get_all_permissions
-
-  repos = permissions[:repositories]
-  user_keys = permissions[:user_keys]
-
-  @commit = false
-
-  @seen = {}
-
-  repos.each do |r|
-    next if r[:name].nil?
-    (@c,@s) = ensure_repo(r[:name],r[:user_permissions],user_keys,gitolite_admin)
-    @seen.merge!(@s)
-    @commit = true if @c
-  end
-
-  # Clean up public key files that should not be present
-  Dir.glob(gitolite_admin + '/keydir/arvados/*.pub') do |key_file|
-    next if key_file =~ /arvados_git_user.pub$/
-    next if @seen.has_key?(key_file)
-    puts "Extra file #{key_file}"
-    @commit = true
-    Dir.chdir(gitolite_admin)
-    key_file.gsub!(/^#{gitolite_admin}\//,'')
-    `git rm #{key_file}`
-  end
-
-  if @commit then
-    message = "#{Time.now().to_s}: update from API"
-    Dir.chdir(gitolite_admin)
-    `git add --all`
-    `git commit -m '#{message}'`
-    `git push`
-  end
-
-rescue Exception => bang
-  puts "Error: " + bang.to_s
-  puts bang.backtrace.join("\n")
-  exit 1
-end
-
diff --git a/docker/arvdock b/docker/arvdock

index 142ba27e3141cb6bb5a47667b99e7167dcba965f..43a384ea497e2cbde1a85b29b1c887edeeb814d5 100755 (executable)
--- a/docker/arvdock
+++ b/docker/arvdock
@@ -6,13 +6,24 @@ if [[ "$DOCKER" == "" ]]; then
      DOCKER=`which docker`
  fi
  
+CURL=`which curl`
+
  COMPUTE_COUNTER=0
  
+ARVADOS_DOMAIN=dev.arvados
+
  function usage {
      echo >&2
-    echo >&2 "usage: $0 (start|stop|restart|test) [options]"
+    echo >&2 "usage: $0 (start|stop|restart|reset|test) [options]"
+    echo >&2
+    echo >&2 "start    run new or restart stopped arvados containers"
+    echo >&2 "stop     stop arvados containers"
+    echo >&2 "restart  stop and then start arvados containers"
+    echo >&2 "reset    stop and delete containers WARNING: this will delete the data inside Arvados!"
+    echo >&2 "test     run tests"
      echo >&2
-    echo >&2 "$0 start/stop/restart options:"
+    echo >&2 "$0 options:"
+    echo >&2 "  -b[bridge], --bridge[=bridge] Docker bridge (default bridge docker0)"
      echo >&2 "  -d[port], --doc[=port]        Documentation server (default port 9898)"
      echo >&2 "  -w[port], --workbench[=port]  Workbench server (default port 9899)"
      echo >&2 "  -s[port], --sso[=port]        SSO server (default port 9901)"
@@ -21,7 +32,9 @@ function usage {
      echo >&2 "  -v, --vm                      Shell server"
      echo >&2 "  -n, --nameserver              Nameserver"
      echo >&2 "  -k, --keep                    Keep servers"
+    echo >&2 "  -p, --keepproxy               Keepproxy server"
      echo >&2 "  -h, --help                    Display this help and exit"
+    echo >&2 "      --domain=dns.domain       DNS domain used by containers (default dev.arvados)"
      echo >&2
      echo >&2 "  If no options are given, the action is applied to all servers."
      echo >&2
@@ -34,7 +47,16 @@ function ip_address {
      echo `$DOCKER inspect $container  |grep IPAddress |cut -f4 -d\"`
  }
  
+function bridge_ip_address {
+    local bridge_name=$1
+    # FIXME: add a more robust check here.
+    # because ip command could be mising, multiple docker bridges could be there.. etc.
+    echo $(ip --oneline --family inet addr show dev "$bridge_name" | awk '{ print $4 }'| cut -d/ -f1 )
+}
+
  function start_container {
+    bridge_ip=$(bridge_ip_address "$bridge")
+
      local args="-d -i -t"
      if [[ "$1" != '' ]]; then
        local port="$1"
@@ -43,19 +65,19 @@ function start_container {
      if [[ "$2" != '' ]]; then
        local name="$2"
        if [[ "$name" == "api_server" ]]; then
-        args="$args --dns=172.17.42.1 --dns-search=compute.dev.arvados --hostname api -P --name $name"
+        args="$args --dns=$bridge_ip --dns-search=compute.$ARVADOS_DOMAIN --hostname api -P --name $name"
        elif [[ "$name" == "compute" ]]; then
          name=$name$COMPUTE_COUNTER
          # We need --privileged because we run docker-inside-docker on the compute nodes
-        args="$args --dns=172.17.42.1 --dns-search=compute.dev.arvados --hostname compute$COMPUTE_COUNTER -P --privileged --name $name"
+        args="$args --dns=$bridge_ip --dns-search=compute.$ARVADOS_DOMAIN --hostname compute$COMPUTE_COUNTER -P --privileged --name $name"
          let COMPUTE_COUNTER=$(($COMPUTE_COUNTER + 1))
        else
-        args="$args --dns=172.17.42.1 --dns-search=dev.arvados --hostname ${name#_server} --name $name"
+        args="$args --dns=$bridge_ip --dns-search=$ARVADOS_DOMAIN --hostname ${name#_server} --name $name"
        fi
      fi
      if [[ "$3" != '' ]]; then
        local volume="$3"
-      args="$args -v $volume"
+      args="$args --volumes-from $volume"
      fi
      if [[ "$4" != '' ]]; then
        local link="$4"
@@ -63,19 +85,22 @@ function start_container {
      fi
      local image=$5
  
-    `$DOCKER ps |grep -P "$name[^/]" -q`
+    `$DOCKER ps |grep -E "\b$name\b" -q`
      if [[ "$?" == "0" ]]; then
        echo "You have a running container with name $name -- skipping."
        return
      fi
  
-    # Remove any existing container by this name.
-    $DOCKER rm "$name" 2>/dev/null
+    echo "Starting container: $name"
+    `$DOCKER ps --all |grep -E "\b$name\b" -q`
+    if [[ "$?" == "0" ]]; then
+        echo "  $DOCKER start $name"
+        container=`$DOCKER start $name`
+    else
+        echo "  $DOCKER run $args $image"
+        container=`$DOCKER run $args $image`
+    fi
  
-    echo "Starting container:"
-    #echo "  $DOCKER run --dns=127.0.0.1 $args $image"
-    echo "  $DOCKER run $args $image"
-    container=`$DOCKER run $args $image`
      if [ "$?" != "0" -o "$container" = "" ]; then
        echo "Unable to start container"
        exit 1
@@ -83,48 +108,15 @@ function start_container {
        echo "Started container: $container"
      fi
  
-    if [[ "$name" == "doc_server" ]]; then
-      echo
-      echo "*****************************************************************"
-      echo "You can access the Arvados documentation at http://localhost:${port%:*}"
-      echo "*****************************************************************"
-      echo
-    fi
-
-    if [[ "$name" == "workbench_server" ]]; then
-      echo
-      echo "*****************************************************************"
-      echo "You can access the Arvados workbench at http://localhost:${port%:*}"
-      echo "*****************************************************************"
-      echo
-   fi
-
-
  }
  
-declare -a keep_volumes
-
-# Initialize the global `keep_volumes' array. If any keep volumes
-# already appear to exist (mounted volumes with a top-level "keep"
-# directory), use them; create temporary volumes if necessary.
-#
+# Create a Docker data volume
  function make_keep_volumes () {
-    # Mount a keep volume if we don't already have one
-    for mountpoint in $(cut -d ' ' -f 2 /proc/mounts); do
-      if [[ -d "$mountpoint/keep" && "$mountpoint" != "/" ]]; then
-        keep_volumes+=($mountpoint)
-      fi
-    done
-
-    # Create any keep volumes that do not yet exist.
-    while [ ${#keep_volumes[*]} -lt 2 ]
-    do
-        new_keep=$(mktemp -d)
-        echo >&2 "mounting 2G tmpfs keep volume in $new_keep"
-        sudo mount -t tmpfs -o size=2G tmpfs $new_keep
-        mkdir $new_keep/keep
-        keep_volumes+=($new_keep)
-    done
+    `$DOCKER ps --all |grep -E "\bkeep_data\b" -q`
+    if [[ "$?" == "0" ]]; then
+      return
+    fi
+    docker create -v /keep-data --name keep_data arvados/keep
  }
  
  function do_start {
@@ -136,10 +128,13 @@ function do_start {
      local start_vm=false
      local start_nameserver=false
      local start_keep=false
+    local start_keepproxy=false
+    local bridge="docker0"
+    local
  
      # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o d::s::a::cw::nkvh \
-                  --long doc::,sso::,api::,compute,workbench::,nameserver,keep,vm,help \
+    local TEMP=`getopt -o d::s::b:a::cw::nkpvh \
+                  --long doc::,sso::,api::,bridge:,compute,workbench::,nameserver,keep,keepproxy,vm,help,domain:: \
                    -n "$0" -- "$@"`
  
      if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -150,6 +145,11 @@ function do_start {
      while [ $# -ge 1 ]
      do
          case $1 in
+            -b | --bridge)
+                case "$2" in
+                    *)  start_bridge=$2; shift 2 ;;
+                esac
+                ;;
              -d | --doc)
                  case "$2" in
                      "") start_doc=9898; shift 2 ;;
@@ -190,6 +190,15 @@ function do_start {
                  start_keep=true
                  shift
                  ;;
+            -p | --keepproxy)
+                start_keepproxy=true
+                shift
+                ;;
+            --domain)
+                case "$2" in
+                    *) ARVADOS_DOMAIN="$2"; shift 2 ;;
+                esac
+                ;;
              --)
                  shift
                  break
@@ -209,51 +218,73 @@ function do_start {
            $start_workbench == false &&
            $start_vm == false &&
            $start_nameserver == false &&
-          $start_keep == false ]]
+          $start_keep == false &&
+          $start_keepproxy == false ]]
      then
          start_doc=9898
-        #the sso server is currently not used by default so don't start it unless explicitly requested
-        #start_sso=9901
+        start_sso=9901
          start_api=9900
          start_compute=2
          start_workbench=9899
-        start_vm=true
+        #start_vm=true
          start_nameserver=true
          start_keep=true
+        start_keepproxy=true
      fi
  
-    if [[ $start_sso != false ]]
+    if [[ $start_nameserver != false ]]
      then
-        start_container "$start_sso:443" "sso_server" '' '' "arvados/sso"
-    fi
+      $DOCKER ps | grep skydns >/dev/null
+      need_skydns="$?"
  
-    if [[ $start_api != false ]]
-    then
-      if [[ $start_sso != false ]]; then
-        start_container "$start_api:443" "api_server" '' "sso_server:sso" "arvados/api"
-      else
-        start_container "$start_api:443" "api_server" '' '' "arvados/api"
+      $DOCKER ps | grep skydock >/dev/null
+      need_skydock="$?"
+
+      if [[ "$need_skydns" != 0 || "$need_skydock" != 0 ]]
+      then
+          # skydns and skydock need to both be running before everything else.
+          # If they are not running we need to shut everything down and start
+          # over, otherwise DNS will be broken and the containers won't find each other.
+          do_stop
+          need_skydns=1
+          need_skydock=1
        fi
-    fi
  
-    if [[ $start_nameserver != false ]]
-    then
        # We rely on skydock and skydns for dns discovery between the slurm controller and compute nodes,
        # so make sure they are running
        $DOCKER ps | grep skydns >/dev/null
-      if [[ "$?" != "0" ]]; then
+      if [[ $need_skydns != "0" ]]; then
+        echo "Detecting bridge '$bridge' IP for crosbymichael/skydns"
+        bridge_ip=$(bridge_ip_address "$bridge")
+
          echo "Starting crosbymichael/skydns container..."
          $DOCKER rm "skydns" 2>/dev/null
-        $DOCKER run -d -p 172.17.42.1:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -domain arvados
+        echo $DOCKER run -d -p $bridge_ip:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -domain arvados
+        $DOCKER run -d -p $bridge_ip:53:53/udp --name skydns crosbymichael/skydns -nameserver 8.8.8.8:53 -domain arvados
        fi
        $DOCKER ps | grep skydock >/dev/null
-      if [[ "$?" != "0" ]]; then
+      if [[ "$need_skydock" != "0" ]]; then
          echo "Starting crosbymichael/skydock container..."
          $DOCKER rm "skydock" 2>/dev/null
+        echo $DOCKER run -d -v /var/run/docker.sock:/docker.sock --name skydock crosbymichael/skydock -ttl 30 -environment dev -s /docker.sock -domain arvados -name skydns
          $DOCKER run -d -v /var/run/docker.sock:/docker.sock --name skydock crosbymichael/skydock -ttl 30 -environment dev -s /docker.sock -domain arvados -name skydns
        fi
      fi
  
+    if [[ $start_sso != false ]]
+    then
+        start_container "$start_sso:443" "sso_server" '' '' "arvados/sso"
+    fi
+
+    if [[ $start_api != false ]]
+    then
+      if [[ $start_sso != false ]]; then
+        start_container "$start_api:443" "api_server" '' "sso_server:sso" "arvados/api"
+      else
+        start_container "$start_api:443" "api_server" '' '' "arvados/api"
+      fi
+    fi
+
      if [[ $start_compute != false ]]
      then
          for i in `seq 0 $(($start_compute - 1))`; do
@@ -266,20 +297,23 @@ function do_start {
          # create `keep_volumes' array with a list of keep mount points
          # remove any stale metadata from those volumes before starting them
          make_keep_volumes
-        for v in ${keep_volumes[*]}
-        do
-            [ -f $v/keep/.metadata.yml ] && sudo rm $v/keep/.metadata.yml
-        done
          start_container "25107:25107" "keep_server_0" \
-            "${keep_volumes[0]}:/keep-data" \
+            "keep_data" \
              "api_server:api" \
              "arvados/keep"
          start_container "25108:25107" "keep_server_1" \
-            "${keep_volumes[1]}:/keep-data" \
+            "keep_data" \
              "api_server:api" \
              "arvados/keep"
      fi
  
+    if [[ $start_keepproxy != false ]]
+    then
+        start_container "9902:9100" "keepproxy_server" '' \
+            "api_server:api" \
+            "arvados/keepproxy"
+    fi
+
      if [[ $start_doc != false ]]
      then
          start_container "$start_doc:80" "doc_server" '' '' "arvados/doc"
@@ -292,7 +326,7 @@ function do_start {
  
      if [[ $start_workbench != false ]]
      then
-        start_container "$start_workbench:80" "workbench_server" '' "api_server:api" "arvados/workbench"
+        start_container "" "workbench_server" '' "" "arvados/workbench"
      fi
  
      if [[ $start_api != false ]]
@@ -309,6 +343,42 @@ EOF
            fi
          fi
      fi
+
+    if [ "$(awk '($1 == "nameserver"){print $2; exit}' </etc/resolv.conf)" != "$bridge_ip" ]; then
+        echo
+        echo "******************************************************************"
+        echo "To access Arvados you must add the Arvados nameserver to the top"
+        echo "of your DNS configuration in /etc/resolv.conf:"
+        echo "nameserver $bridge_ip"
+        echo
+        echo "Then run '$0 start' again"
+        echo "******************************************************************"
+        echo
+    else
+        while ! $CURL -k -L -f http://workbench.$ARVADOS_DOMAIN >/dev/null 2>/dev/null ; do
+            echo "Waiting for Arvados to be ready."
+            sleep 1
+        done
+
+        `$DOCKER ps |grep -E "\bdoc_server\b" -q`
+        if [[ "$?" == "0" ]]; then
+            echo
+            echo "******************************************************************"
+            echo "You can access the Arvados documentation at http://doc.$ARVADOS_DOMAIN"
+            echo "******************************************************************"
+            echo
+        fi
+
+        `$DOCKER ps |grep -E "\bworkbench_server\b" -q`
+        if [[ "$?" == "0" ]]; then
+            echo
+            echo "********************************************************************"
+            echo "You can access the Arvados workbench at http://workbench.$ARVADOS_DOMAIN"
+            echo "********************************************************************"
+            echo
+        fi
+    fi
+
  }
  
  function do_stop {
@@ -320,10 +390,11 @@ function do_stop {
      local stop_nameserver=""
      local stop_vm=""
      local stop_keep=""
+    local stop_keepproxy=""
  
      # NOTE: This requires GNU getopt (part of the util-linux package on Debian-based distros).
-    local TEMP=`getopt -o dsacwnkvh \
-                  --long doc,sso,api,compute,workbench,nameserver,keep,vm,help \
+    local TEMP=`getopt -o dsacwnkpvh \
+                  --long doc,sso,api,compute,workbench,nameserver,keep,keepproxy,vm,help,domain:: \
                    -n "$0" -- "$@"`
  
      if [ $? != 0 ] ; then echo "Use -h for help"; exit 1 ; fi
@@ -341,7 +412,7 @@ function do_stop {
              -a | --api)
                  stop_api=api_server ; shift ;;
              -c | --compute)
-                stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
+                stop_compute=`$DOCKER ps |grep -E "\bcompute[0-9]+\b" |grep -v api_server |cut -f1 -d ' '` ; shift ;;
              -w | --workbench)
                  stop_workbench=workbench_server ; shift ;;
              -n | --nameserver )
@@ -350,6 +421,13 @@ function do_stop {
                  stop_vm="shell" ; shift ;;
              -k | --keep )
                  stop_keep="keep_server_0 keep_server_1" ; shift ;;
+            -p | --keepproxy )
+                stop_keep="keepproxy_server" ; shift ;;
+            --domain)
+                case "$2" in
+                    *) ARVADOS_DOMAIN="$2"; shift 2 ;;
+                esac
+                ;;
              --)
                  shift
                  break
@@ -369,19 +447,21 @@ function do_stop {
            $stop_workbench == "" &&
            $stop_vm == "" &&
            $stop_nameserver == "" &&
-          $stop_keep == "" ]]
+          $stop_keep == "" &&
+          $stop_keepproxy == "" ]]
      then
          stop_doc=doc_server
          stop_sso=sso_server
          stop_api=api_server
-        stop_compute=`$DOCKER ps |grep -P "compute\d+" |grep -v api_server |cut -f1 -d ' '`
+        stop_compute=`$DOCKER ps |grep -E "\bcompute[0-9]+\b" |grep -v api_server |cut -f1 -d ' '`
          stop_workbench=workbench_server
          stop_vm=shell
          stop_nameserver="skydock skydns"
          stop_keep="keep_server_0 keep_server_1"
+        stop_keepproxy="keepproxy_server"
      fi
  
-    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_nameserver $stop_keep $stop_vm \
+    $DOCKER stop $stop_doc $stop_sso $stop_api $stop_compute $stop_workbench $stop_nameserver $stop_keep $stop_keepproxy $stop_vm \
          2>/dev/null
  }
  
@@ -416,6 +496,34 @@ function do_test {
      done
  }
  
+function do_reset {
+    for name in skydock skydns workbench_server shell doc_server keepproxy_server keep_server_0 keep_server_1 compute0 compute1 api_server keepproxy keep_data sso_server
+    do
+        `$DOCKER ps |grep -E "\b$name\b" -q`
+        if [[ "$?" == "0" ]]; then
+            echo "  $DOCKER stop $name"
+            $DOCKER stop $name
+        fi
+        `$DOCKER ps --all |grep -E "\b$name\b" -q`
+        if [[ "$?" == "0" ]]; then
+            echo "  $DOCKER rm $name"
+            $DOCKER rm $name
+        fi
+    done
+}
+
+if [ "$DOCKER" == '' ]
+then
+  echo "Docker not found. Please install it first."
+  exit 2
+fi
+
+if [ "$CURL" == '' ]
+then
+  echo "Curl not found. Please install it first."
+  exit 3
+fi
+
  if [ $# -lt 1 ]
  then
    usage
@@ -440,6 +548,10 @@ case $1 in
          shift
          do_test $@
          ;;
+    reset)
+        shift
+        do_reset $@
+        ;;
      *)
          usage
          exit 1
diff --git a/docker/base/Dockerfile b/docker/base/Dockerfile

index c4b744ba5cb509195df061f9891b7934ba3b6d2e..5eeabc8835af13db94c3e9f59e29ff94a2367aae 100644 (file)
--- a/docker/base/Dockerfile
+++ b/docker/base/Dockerfile
@@ -2,7 +2,7 @@
  
  # Based on Debian Wheezy
  FROM arvados/debian:wheezy
-MAINTAINER Tim Pierce <twp@curoverse.com>
+MAINTAINER Ward Vandewege <ward@curoverse.com>
  
  ENV DEBIAN_FRONTEND noninteractive
  
@@ -12,11 +12,17 @@ ENV DEBIAN_FRONTEND noninteractive
  
  ADD apt.arvados.org.list /etc/apt/sources.list.d/
  RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
-RUN apt-get update -qq
-
-RUN apt-get install -qqy apt-utils git curl \
+RUN apt-get update -q
+
+## 2015-06-29 nico
+## KNOWN BUG:  python-oauth2client needs specific versions
+## python-pyasn1=0.1.7 python-pyasn1-modules=0.0.5
+## but apt-get doesn't resolv them correctly. we have to
+## do it by hand here (or add apt_preferences if it gets too hairy)
+RUN apt-get install -qy apt-utils git curl \
               libcurl3 libcurl3-gnutls libcurl4-openssl-dev locales \
-             postgresql-server-dev-9.1 python-arvados-python-client
+             postgresql-server-dev-9.1 python-arvados-python-client \
+             python-google-api-python-client python-oauth2client python-pyasn1=0.1.7 python-pyasn1-modules=0.0.5
  
  RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
      /bin/sed -ri 's/# en_US.UTF-8 UTF-8/en_US.UTF-8 UTF-8/' /etc/locale.gen && \
@@ -31,7 +37,7 @@ ADD generated/arvados.tar.gz /usr/src/arvados/
  # Update gem. This (hopefully) fixes
  # https://github.com/rubygems/rubygems.org/issues/613.
  RUN /usr/local/rvm/bin/rvm-exec default gem update --system && \
-    /usr/local/rvm/bin/rvm-exec default gem install bundler && \
+    /usr/local/rvm/bin/rvm-exec default gem install bundler  -v 1.9.9 && \
      /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/arvados/apps/workbench/Gemfile && \
      /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/arvados/services/api/Gemfile && \
      /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/arvados/doc/Gemfile
diff --git a/docker/build_tools/Makefile b/docker/build_tools/Makefile

index 9d93d2bdef7099413e3ca32a34b70ee2e0141163..75702960133b3be2555b851babb9d5bf92e4c9a3 100644 (file)
--- a/docker/build_tools/Makefile
+++ b/docker/build_tools/Makefile
@@ -4,7 +4,7 @@ OLD_SHELL := $(SHELL)
  SHELL = $(warning [$@])$(OLD_SHELL) -x
  endif
  
-all: skydns-image skydock-image api-image compute-image doc-image workbench-image keep-image sso-image shell-image
+all: skydns-image skydock-image api-image compute-image doc-image workbench-image keep-image keep-proxy-image sso-image shell-image
  
  IMAGE_FILES := $(shell ls *-image 2>/dev/null |grep -v -E 'debian-arvados-image|skydns-image|skydock-image')
  GENERATED_DIRS := $(shell ls */generated 2>/dev/null)
@@ -19,9 +19,15 @@ clean:
  
  DEBIAN_IMAGE := $(shell $(DOCKER) images -q arvados/debian |head -n1)
  
-REALCLEAN_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e arvados -e api_server -e keep_server -e doc_server -e workbench_server |cut -f 1 -d' ')
-REALCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/* |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
-DEEPCLEAN_IMAGES := $(shell $(DOCKER) images -q arvados/*)
+REALCLEAN_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e arvados -e api_server -e keep_server -e keep_proxy_server -e doc_server -e workbench_server |cut -f 1 -d' ')
+# Generate a list of docker images tagged as arvados/*
+# but exclude those tagged as arvados/build
+ADI_TEMPFILE := $(shell mktemp)
+ARVADOS_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/* |sort > $(ADI_TEMPFILE))
+ABDI_TEMPFILE := $(shell mktemp)
+ARVADOS_BUILD_DOCKER_IMAGES := $(shell $(DOCKER) images -q arvados/build |sort > $(ABDI_TEMPFILE))
+REALCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE) |grep -v $(DEBIAN_IMAGE) 2>/dev/null)
+DEEPCLEAN_IMAGES := $(shell comm -3 $(ADI_TEMPFILE) $(ABDI_TEMPFILE))
  SKYDNS_CONTAINERS := $(shell $(DOCKER) ps -a |grep -e crosbymichael/skydns -e crosbymichael/skydock |cut -f 1 -d' ')
  SKYDNS_IMAGES := $(shell $(DOCKER) images -q crosbymichael/skyd*)
  
@@ -70,7 +76,7 @@ SHELL_DEPS = shell/* config.yml $(SHELL_GENERATED)
  
  COMPUTE_DEPS = compute/* config.yml $(COMPUTE_GENERATED)
  
-DOC_DEPS = doc/Dockerfile doc/apache2_vhost
+DOC_DEPS = doc/Dockerfile $(DOC_GENERATED)
  
  WORKBENCH_DEPS = workbench/Dockerfile \
                   config.yml \
@@ -78,6 +84,8 @@ WORKBENCH_DEPS = workbench/Dockerfile \
  
  KEEP_DEPS = keep/Dockerfile config.yml $(KEEP_GENERATED)
  
+KEEP_PROXY_DEPS = keepproxy/Dockerfile config.yml $(KEEP_PROXY_GENERATED)
+
  SSO_DEPS = config.yml $(SSO_GENERATED)
  
  BCBIO_NEXTGEN_DEPS = bcbio-nextgen/Dockerfile
@@ -90,6 +98,9 @@ COMPUTE_GENERATED      = compute/generated/*
  KEEP_GENERATED_IN      = keep/*.in
  KEEP_GENERATED         = keep/generated/*
  
+KEEP_PROXY_GENERATED_IN      = keepproxy/*.in
+KEEP_PROXY_GENERATED         = keepproxy/generated/*
+
  API_GENERATED_IN       = api/*.in
  API_GENERATED          = api/generated/*
  
@@ -105,6 +116,9 @@ WORKBENCH_GENERATED    = workbench/generated/*
  SSO_GENERATED_IN       = sso/*.in
  SSO_GENERATED          = sso/generated/*
  
+DOC_GENERATED_IN       = doc/*.in
+DOC_GENERATED          = doc/generated/*
+
  KEEP_DEPS += keep/generated/bin/keepproxy
  KEEP_DEPS += keep/generated/bin/keepstore
  keep/generated/bin/%: $(wildcard build/services/%/*.go)
@@ -112,6 +126,12 @@ keep/generated/bin/%: $(wildcard build/services/%/*.go)
         ln -sfn ../../../../.. keep/generated/src/git.curoverse.com/arvados.git
         GOPATH=$(shell pwd)/keep/generated go get $(@:keep/generated/bin/%=git.curoverse.com/arvados.git/services/%)
  
+KEEP_PROXY_DEPS += keepproxy/generated/bin/keepproxy
+keepproxy/generated/bin/%: $(wildcard build/services/%/*.go)
+       mkdir -p keepproxy/generated/src/git.curoverse.com
+       ln -sfn ../../../../.. keepproxy/generated/src/git.curoverse.com/arvados.git
+       GOPATH=$(shell pwd)/keepproxy/generated go get $(@:keepproxy/generated/bin/%=git.curoverse.com/arvados.git/services/%)
+
  $(BUILD):
         mkdir -p build
         rsync -rlp --exclude=docker/ --exclude='**/log/*' --exclude='**/tmp/*' \
@@ -147,9 +167,15 @@ $(COMPUTE_GENERATED): $(COMPUTE_GENERATED_IN)
  $(SSO_GENERATED): $(SSO_GENERATED_IN)
         $(CONFIG_RB) sso
  
+$(DOC_GENERATED): $(DOC_GENERATED_IN)
+       $(CONFIG_RB) doc
+
  $(KEEP_GENERATED): $(KEEP_GENERATED_IN)
         $(CONFIG_RB) keep
  
+$(KEEP_PROXY_GENERATED): $(KEEP_PROXY_GENERATED_IN)
+       $(CONFIG_RB) keepproxy
+
  DOCKER_BUILD = $(DOCKER) build --rm=true
  
  # ============================================================
@@ -185,7 +211,12 @@ keep-image: debian-arvados-image $(BUILD) $(KEEP_DEPS)
         $(DOCKER_BUILD) -t arvados/keep keep
         date >keep-image
  
-jobs-image: base-image $(BUILD) $(JOBS_DEPS)
+keep-proxy-image: debian-arvados-image $(BUILD) $(KEEP_PROXY_DEPS)
+       @echo "Building keep-proxy-image"
+       $(DOCKER_BUILD) -t arvados/keepproxy keepproxy
+       date >keep-proxy-image
+
+jobs-image: debian-arvados-image $(BUILD) $(JOBS_DEPS)
         $(DOCKER_BUILD) -t arvados/jobs jobs
         date >jobs-image
  
@@ -236,7 +267,7 @@ base-image: debian-arvados-image $(BASE_DEPS)
  
  debian-arvados-image:
         @echo "Building debian-arvados-image"
-       ./mkimage-debootstrap.sh arvados/debian wheezy ftp://ftp.us.debian.org/debian/
+       ./mkimage-debootstrap.sh arvados/debian wheezy http://ftp.us.debian.org/debian/
         date >debian-arvados-image
  
  skydns-image:
diff --git a/docker/build_tools/build.rb b/docker/build_tools/build.rb

index e8f58097d864610e2641142584f061c9c94cefc2..e3309a922982647574f266ae7bfbe8d3b0894f58 100755 (executable)
--- a/docker/build_tools/build.rb
+++ b/docker/build_tools/build.rb
@@ -51,38 +51,38 @@ def main options
    # Generate a config.yml if it does not exist or is empty
    if not File.size? 'config.yml'
      print "Generating config.yml.\n"
-    print "Arvados needs to know the email address of the administrative user,\n"
-    print "so that when that user logs in they are automatically made an admin.\n"
-    print "This should be an email address associated with a Google account.\n"
-    print "\n"
-    admin_email_address = ""
-    until is_valid_email? admin_email_address
-      print "Enter your Google ID email address here: "
-      admin_email_address = gets.strip
-      if not is_valid_email? admin_email_address
-        print "That doesn't look like a valid email address. Please try again.\n"
-      end
-    end
-
-    print "Arvados needs to know the shell login name for the administrative user.\n"
-    print "This will also be used as the name for your git repository.\n"
-    print "\n"
-    user_name = ""
-    until is_valid_user_name? user_name
-      print "Enter a shell login name here: "
-      user_name = gets.strip
-      if not is_valid_user_name? user_name
-        print "That doesn't look like a valid shell login name. Please try again.\n"
-      end
-    end
+    # print "Arvados needs to know the email address of the administrative user,\n"
+    # print "so that when that user logs in they are automatically made an admin.\n"
+    # print "This should be an email address associated with a Google account.\n"
+    # print "\n"
+    # admin_email_address = ""
+    # until is_valid_email? admin_email_address
+    #   print "Enter your Google ID email address here: "
+    #   admin_email_address = gets.strip
+    #   if not is_valid_email? admin_email_address
+    #     print "That doesn't look like a valid email address. Please try again.\n"
+    #   end
+    # end
+
+    # print "Arvados needs to know the shell login name for the administrative user.\n"
+    # print "This will also be used as the name for your git repository.\n"
+    # print "\n"
+    # user_name = ""
+    # until is_valid_user_name? user_name
+    #   print "Enter a shell login name here: "
+    #   user_name = gets.strip
+    #   if not is_valid_user_name? user_name
+    #     print "That doesn't look like a valid shell login name. Please try again.\n"
+    #   end
+    # end
  
      File.open 'config.yml', 'w' do |config_out|
        config_out.write "# If a _PW or _SECRET variable is set to an empty string, a password\n"
        config_out.write "# will be chosen randomly at build time. This is the\n"
        config_out.write "# recommended setting.\n\n"
        config = YAML.load_file 'config.yml.example'
-      config['API_AUTO_ADMIN_USER'] = admin_email_address
-      config['ARVADOS_USER_NAME'] = user_name
+      #config['API_AUTO_ADMIN_USER'] = admin_email_address
+      #config['ARVADOS_USER_NAME'] = user_name
        config['API_HOSTNAME'] = generate_api_hostname
        config['API_WORKBENCH_ADDRESS'] = 'false'
        config.each_key do |var|
diff --git a/docker/compute/Dockerfile b/docker/compute/Dockerfile

index 462115cab0d8b07d7759da951b186e143663acdf..f2f48da79453f8568442be224bded0210a91a677 100644 (file)
--- a/docker/compute/Dockerfile
+++ b/docker/compute/Dockerfile
@@ -3,8 +3,8 @@
  FROM arvados/slurm
  MAINTAINER Ward Vandewege <ward@curoverse.com>
  
-RUN apt-get update -qq
-RUN apt-get install -qqy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron dnsmasq
+RUN apt-get update -q
+RUN apt-get install -qy supervisor python-pip python-pyvcf python-gflags python-google-api-python-client python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse crunchstat python-arvados-fuse cron dnsmasq
  
  ADD fuse.conf /etc/fuse.conf
  RUN chmod 644 /etc/fuse.conf
@@ -12,7 +12,7 @@ RUN chmod 644 /etc/fuse.conf
  RUN /usr/local/rvm/bin/rvm-exec default gem install arvados-cli arvados
  
  # Install Docker from the Arvados package repository (cf. arvados/base)
-RUN apt-get install -qqy iptables ca-certificates lxc apt-transport-https docker.io
+RUN apt-get install -qy iptables ca-certificates lxc apt-transport-https docker.io
  
  RUN addgroup --gid 4005 crunch && mkdir /home/crunch && useradd --uid 4005 --gid 4005 crunch && usermod crunch -G fuse,docker && chown crunch:crunch /home/crunch
  
@@ -20,6 +20,7 @@ RUN addgroup --gid 4005 crunch && mkdir /home/crunch && useradd --uid 4005 --gid
  ADD supervisor.conf /etc/supervisor/conf.d/arvados.conf
  ADD generated/setup.sh /usr/local/bin/setup.sh
  ADD wrapdocker /usr/local/bin/wrapdocker.sh
+ADD munge.sh /usr/local/bin/munge.sh
  
  VOLUME /var/lib/docker
  # Start the supervisor.
diff --git a/docker/compute/munge.sh b/docker/compute/munge.sh

new file mode 100755 (executable)

index 0000000..ef10d01
--- /dev/null
+++ b/docker/compute/munge.sh
@@ -0,0 +1,3 @@
+#!/bin/sh
+rm -rf /var/run/munge
+exec /etc/init.d/munge start
diff --git a/docker/compute/setup.sh.in b/docker/compute/setup.sh.in

index e107d80bdd7debd2ecd945d2fffdbe954e8a0ea9..efb2c4148e385a412b9e1f6bfa387e7181d623e1 100755 (executable)
--- a/docker/compute/setup.sh.in
+++ b/docker/compute/setup.sh.in
@@ -5,16 +5,23 @@
  export ARVADOS_API_HOST=api
  export ARVADOS_API_HOST_INSECURE=yes
  export ARVADOS_API_TOKEN=@@API_SUPERUSER_SECRET@@
+export HOME=/root
  
-arv node create --node {} > /tmp/node.json
+# Wait for API server to come up.
+while ! arv user current ; do sleep 1 ; done
  
-UUID=`grep \"uuid\" /tmp//node.json  |cut -f4 -d\"`
-PING_SECRET=`grep \"ping_secret\" /tmp//node.json  |cut -f4 -d\"`
+if ! test -f /root/node.json ; then
+    arv node create --node "{\"hostname\": \"$(hostname)\"}" > /root/node.json
  
-echo "*/5 * * * * root /usr/bin/curl -k -d ping_secret=$PING_SECRET https://api/arvados/v1/nodes/$UUID/ping" > /etc/cron.d/node_ping
+    # Make sure /dev/fuse permissions are correct (the device appears after fuse is loaded)
+    chmod 1660 /dev/fuse && chgrp fuse /dev/fuse
+fi
  
-# Send a ping now
-/usr/bin/curl -k -d ping_secret=$PING_SECRET https://api/arvados/v1/nodes/$UUID/ping?ping_secret=$PING_SECRET
+UUID=`grep \"uuid\" /root/node.json  |cut -f4 -d\"`
+PING_SECRET=`grep \"ping_secret\" /root/node.json  |cut -f4 -d\"`
+
+if ! test -f /etc/cron.d/node_ping ; then
+    echo "*/5 * * * * root /usr/bin/curl -k -d ping_secret=$PING_SECRET https://api/arvados/v1/nodes/$UUID/ping" > /etc/cron.d/node_ping
+fi
  
-# Just make sure /dev/fuse permissions are correct (the device appears after fuse is loaded)
-chmod 1660 /dev/fuse && chgrp fuse /dev/fuse
+/usr/bin/curl -k -d ping_secret=$PING_SECRET https://api/arvados/v1/nodes/$UUID/ping?ping_secret=$PING_SECRET
diff --git a/docker/compute/supervisor.conf b/docker/compute/supervisor.conf

index 615e55a953606e948f7628d68b6ab8e37bebd390..b3c715b5a9d08a9c8133f61029aa2b26d08e7852 100644 (file)
--- a/docker/compute/supervisor.conf
+++ b/docker/compute/supervisor.conf
@@ -1,6 +1,6 @@
  [program:munge]
  user=root
-command=/etc/init.d/munge start
+command=/usr/local/bin/munge.sh
  startsecs=0
  
  [program:slurm]
@@ -26,4 +26,3 @@ command=/usr/local/bin/wrapdocker.sh
  user=root
  command=/etc/init.d/dnsmasq start
  startsecs=0
-
diff --git a/docker/config.yml.example b/docker/config.yml.example

index 4210ec3e161ad2b1fbaa355d9f6b249a741bf00b..f40c0fe8b43f69c99db7b49abaccdaa7066c0d3d 100644 (file)
--- a/docker/config.yml.example
+++ b/docker/config.yml.example
@@ -7,7 +7,7 @@ ARVADOS_USER_NAME:
  
  # ARVADOS_DOMAIN: the Internet domain of this installation.
  # ARVADOS_DNS_SERVER: the authoritative nameserver for ARVADOS_DOMAIN.
-ARVADOS_DOMAIN:         # e.g. arvados.internal
+ARVADOS_DOMAIN: dev.arvados
  ARVADOS_DNS_SERVER:     # e.g. 192.168.0.1
  
  # ==============================
@@ -79,8 +79,6 @@ WORKBENCH_VCF_PIPELINE_UUID:
  WORKBENCH_SITE_NAME: Arvados Workbench
  WORKBENCH_INSECURE_HTTPS: true
  WORKBENCH_ACTIVATION_CONTACT_LINK: mailto:arvados@curoverse.com
-WORKBENCH_ARVADOS_LOGIN_BASE: https://@@API_HOSTNAME@@.@@ARVADOS_DOMAIN@@/login
-WORKBENCH_ARVADOS_V1_BASE: https://@@API_HOSTNAME@@.@@ARVADOS_DOMAIN@@/arvados/v1
  WORKBENCH_SECRET:
  
  # ==============================
@@ -89,11 +87,5 @@ WORKBENCH_SECRET:
  SSO_HOSTNAME: sso
  SSO_SECRET:
  SSO_CLIENT_NAME: devsandbox
-# ==============================
-# Default to using auth.curoverse.com as SSO server
-# To use your a local Docker SSO server, set OMNIAUTH_URL and SSO_CLIENT_SECRET
-# to the empty string
-# ==============================
-OMNIAUTH_URL: https://auth.curoverse.com
  SSO_CLIENT_APP_ID: local_docker_installation
-SSO_CLIENT_SECRET: yohbai4eecohshoo1Yoot7tea9zoca9Eiz3Tajahweo9eePaeshaegh9meiye2ph
+SSO_CLIENT_SECRET:
diff --git a/docker/doc/Dockerfile b/docker/doc/Dockerfile

index aa51a389c2b6ccfa13524fdaf1544981f84c20d3..1492675275d1cb87ee2172cda186c0400c6493e4 100644 (file)
--- a/docker/doc/Dockerfile
+++ b/docker/doc/Dockerfile
@@ -5,8 +5,8 @@ maintainer Ward Vandewege <ward@curoverse.com>
  
  # Install packages
  RUN /bin/mkdir -p /usr/src/arvados && \
-    apt-get update -qq && \
-    apt-get install -qqy curl procps apache2-mpm-worker
+    apt-get update -q && \
+    apt-get install -qy curl procps apache2-mpm-worker
  
  ADD generated/doc.tar.gz /usr/src/arvados/
  
@@ -14,10 +14,11 @@ ADD generated/doc.tar.gz /usr/src/arvados/
  RUN /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/arvados/doc/Gemfile && \
      /bin/sed -ri 's/^baseurl: .*$/baseurl: /' /usr/src/arvados/doc/_config.yml && \
      cd /usr/src/arvados/doc && \
-    LANG="en_US.UTF-8" LC_ALL="en_US.UTF-8" /usr/local/rvm/bin/rvm-exec default bundle exec rake
+    LANG="en_US.UTF-8" LC_ALL="en_US.UTF-8" /usr/local/rvm/bin/rvm-exec default bundle exec rake generate arvados_api_host=api.dev.arvados arvados_workbench_host=workbench.dev.arvados
+
  
  # Configure Apache
-ADD apache2_vhost /etc/apache2/sites-available/doc
+ADD generated/apache2_vhost /etc/apache2/sites-available/doc
  RUN \
    a2dissite default && \
    a2ensite doc
diff --git a/docker/doc/apache2_vhost b/docker/doc/apache2_vhost.in

similarity index 62%

rename from docker/doc/apache2_vhost

rename to docker/doc/apache2_vhost.in

index 3a077760b188577e4891fea00554827af06de629..76da6d0067cd5ae6910f37f06e2de1afc4a01c5b 100644 (file)
--- a/docker/doc/apache2_vhost
+++ b/docker/doc/apache2_vhost.in
@@ -1,12 +1,11 @@
  
-ServerName doc.arvados.org
+ServerName doc.@@ARVADOS_DOMAIN@@
  
  <VirtualHost *:80>
    ServerAdmin sysadmin@curoverse.com
  
-  ServerName doc.arvados.org
+  ServerName doc.@@ARVADOS_DOMAIN@@
  
    DocumentRoot /usr/src/arvados/doc/.site/
  
  </VirtualHost>
-
diff --git a/docker/java-bwa-samtools/Dockerfile b/docker/java-bwa-samtools/Dockerfile

index 713ef2116a6fe7a7936ce6abc34877f3fc3de348..2a73977c525aa74cb22d81ff320fd44f22031f69 100644 (file)
--- a/docker/java-bwa-samtools/Dockerfile
+++ b/docker/java-bwa-samtools/Dockerfile
@@ -3,8 +3,8 @@ MAINTAINER Peter Amstutz <peter.amstutz@curoverse.com>
  
  USER root
  
-RUN apt-get update -qq
-RUN apt-get install -qqy openjdk-7-jre-headless && \
+RUN apt-get update -q
+RUN apt-get install -qy openjdk-7-jre-headless && \
      cd /tmp && \
      curl --location http://cache.arvados.org/sourceforge.net/project/bio-bwa/bwa-0.7.9a.tar.bz2 -o bwa-0.7.9a.tar.bz2 && \
      tar xjf bwa-0.7.9a.tar.bz2 && \
diff --git a/docker/jobs/Dockerfile b/docker/jobs/Dockerfile

index 313dd3662a2e80192db76e69e3243f9d56a2b90f..41e4aea1ddcf0f4b69a6735b72fcbecf7b942a72 100644 (file)
--- a/docker/jobs/Dockerfile
+++ b/docker/jobs/Dockerfile
@@ -1,20 +1,21 @@
-FROM arvados/base
-MAINTAINER Brett Smith <brett@curoverse.com>
+# Based on Debian Wheezy
+FROM arvados/debian:wheezy
+MAINTAINER Ward Vandewege <ward@curoverse.com>
+
+ENV DEBIAN_FRONTEND noninteractive
+
+ADD apt.arvados.org.list /etc/apt/sources.list.d/
+RUN apt-key adv --keyserver pool.sks-keyservers.net --recv 1078ECD7
+RUN apt-get update -q
+
+RUN apt-get install -qy git python-minimal python-virtualenv python-arvados-python-client
+
+RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3
  
  # Install dependencies and set up system.
  # The FUSE packages help ensure that we can install the Python SDK (arv-mount).
-RUN /usr/bin/apt-get install -q -y \
-      python-dev python-llfuse python-pip python-virtualenv \
-      libio-socket-ssl-perl libjson-perl liburi-perl libwww-perl dtrx \
-      fuse libattr1-dev libfuse-dev && \
-    /usr/sbin/adduser --disabled-password \
+RUN /usr/sbin/adduser --disabled-password \
        --gecos 'Crunch execution user' crunch && \
-    /usr/bin/install --directory --owner=crunch --group=crunch --mode=0700 /keep /tmp/crunch-src /tmp/crunch-job && \
-    /bin/ln -s /usr/src/arvados /usr/local/src/arvados
-
-# Install Arvados packages.
-RUN (find /usr/src/arvados/sdk -name '*.gem' -print0 | \
-      xargs -0rn 1 /usr/local/rvm/bin/rvm-exec default gem install) && \
-     apt-get -qqy install python-arvados-fuse
+    /usr/bin/install --directory --owner=crunch --group=crunch --mode=0700 /keep /tmp/crunch-src /tmp/crunch-job
  
  USER crunch
diff --git a/docker/jobs/apt.arvados.org.list b/docker/jobs/apt.arvados.org.list

new file mode 100644 (file)

index 0000000..7eb8716
--- /dev/null
+++ b/docker/jobs/apt.arvados.org.list
@@ -0,0 +1,2 @@
+# apt.arvados.org
+deb http://apt.arvados.org/ wheezy main
diff --git a/docker/keep/Dockerfile b/docker/keep/Dockerfile

index cd40a72f5b236d9972c710dd30c656a21eaa1607..08e5175ecd810b79f9f0731693cf0c544d0e47d0 100644 (file)
--- a/docker/keep/Dockerfile
+++ b/docker/keep/Dockerfile
@@ -8,5 +8,7 @@ ADD generated/run-keep /usr/local/bin/
  
  ADD generated/keep_signing_secret /etc/
  
+RUN mkdir /keep-data
+
  # Start keep
  CMD ["/usr/local/bin/run-keep"]
diff --git a/docker/keep/run-keep.in b/docker/keep/run-keep.in

index a0b4cb0d2cb57101f9b287c83fffb2b9e34a7a20..385f0e6c04cb28ce05ef2f17df174dbc6d52cdce 100755 (executable)
--- a/docker/keep/run-keep.in
+++ b/docker/keep/run-keep.in
@@ -8,4 +8,4 @@ else
      permission_args=""
  fi
  
-exec keepstore $permission_args -listen=":25107" -volumes="/keep-data"
+exec keepstore $permission_args -listen=":25107" -volume="/keep-data"
diff --git a/docker/keepproxy/Dockerfile b/docker/keepproxy/Dockerfile

new file mode 100644 (file)

index 0000000..e8df168
--- /dev/null
+++ b/docker/keepproxy/Dockerfile
@@ -0,0 +1,12 @@
+# Based on Debian Wheezy
+FROM arvados/debian:wheezy
+MAINTAINER Ward Vandewege <ward@curoverse.com>
+
+RUN apt-get update -q
+RUN apt-get install -qy ca-certificates
+
+ADD generated/bin/keepproxy /usr/local/bin/
+ADD generated/run-keepproxy /usr/local/bin/
+
+# Start keep
+CMD ["/usr/local/bin/run-keepproxy"]
diff --git a/docker/keepproxy/run-keepproxy.in b/docker/keepproxy/run-keepproxy.in

new file mode 100755 (executable)

index 0000000..4bd934d
--- /dev/null
+++ b/docker/keepproxy/run-keepproxy.in
@@ -0,0 +1,15 @@
+#!/bin/sh
+
+export ARVADOS_API_HOST=api
+export ARVADOS_API_HOST_INSECURE=yes
+# This should be an anonymous token, but we don't have a good way
+# to get one while building the images
+export ARVADOS_API_TOKEN=@@API_SUPERUSER_SECRET@@
+
+read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat
+trap "kill -TERM -$pgrp; exit" HUP EXIT TERM QUIT
+
+while /bin/true ; do
+    keepproxy -listen=':9100'
+    sleep 1
+done
diff --git a/docker/mkimage-debootstrap.sh b/docker/mkimage-debootstrap.sh

index b4010ef4d69e5603d417763bed611fae1f0b4832..2ad79ef856009f647e768af1589c62a8c9bbc13a 100755 (executable)
--- a/docker/mkimage-debootstrap.sh
+++ b/docker/mkimage-debootstrap.sh
@@ -118,7 +118,7 @@ fi
  # will be filled in later, if [ -z "$skipDetection" ]
  lsbDist=''
  
-target="/tmp/docker-rootfs-debootstrap-$suite-$$-$RANDOM"
+target="${TMPDIR:-/tmp}/docker-rootfs-debootstrap-$suite-$$-$RANDOM"
  
  cd "$(dirname "$(readlink -f "$BASH_SOURCE")")"
  returnTo="$(pwd -P)"
@@ -201,32 +201,33 @@ else
         sudo tar --numeric-owner -c . | $docker import - $repo:$suite
         
         # test the image
-       $docker run -i -t $repo:$suite echo success
+       [[ "$(/usr/bin/tty || true)" != "not a tty" ]] && RUN_OPTS="-i -t"
+       $docker run $RUN_OPS $repo:$suite echo success
         
         if [ -z "$skipDetection" ]; then
                 case "$lsbDist" in
                         Debian)
                                 if [ "$suite" = "$debianStable" -o "$suite" = 'stable' ] && [ -r etc/debian_version ]; then
                                         # tag latest
-                                       $docker tag $repo:$suite $repo:latest
+                                       $docker tag -f $repo:$suite $repo:latest
                                         
                                         if [ -r etc/debian_version ]; then
                                                 # tag the specific debian release version (which is only reasonable to tag on debian stable)
                                                 ver=$(cat etc/debian_version)
-                                               $docker tag $repo:$suite $repo:$ver
+                                               $docker tag -f $repo:$suite $repo:$ver
                                         fi
                                 fi
                                 ;;
                         Ubuntu)
                                 if [ "$suite" = "$ubuntuLatestLTS" ]; then
                                         # tag latest
-                                       $docker tag $repo:$suite $repo:latest
+                                       $docker tag -f $repo:$suite $repo:latest
                                 fi
                                 if [ -r etc/lsb-release ]; then
                                         lsbRelease="$(. etc/lsb-release && echo "$DISTRIB_RELEASE")"
                                         if [ "$lsbRelease" ]; then
                                                 # tag specific Ubuntu version number, if available (12.04, etc.)
-                                               $docker tag $repo:$suite $repo:$lsbRelease
+                                               $docker tag -f $repo:$suite $repo:$lsbRelease
                                         fi
                                 fi
                                 ;;
diff --git a/docker/passenger/Dockerfile b/docker/passenger/Dockerfile

index 5e0fd76b04955b8fb94f23ed327a6ac4439872f1..77aeb6606b0a8339a225cda6bcbd1b4c3b3ceefd 100644 (file)
--- a/docker/passenger/Dockerfile
+++ b/docker/passenger/Dockerfile
@@ -5,8 +5,8 @@ MAINTAINER Ward Vandewege <ward@curoverse.com>
  
  # Install packages and build the passenger apache module
  
-RUN apt-get update -qq
-RUN apt-get install -qqy \
+RUN apt-get update -q
+RUN apt-get install -qy \
          apt-utils git curl procps apache2-mpm-worker \
          libcurl4-openssl-dev apache2-threaded-dev \
          libapr1-dev libaprutil1-dev
diff --git a/docker/postgresql/Dockerfile b/docker/postgresql/Dockerfile

index b6fed4e8c5a9a0c2bce7cac0c47ec28930fea923..a99a886e956eba25f5206c1e81c8bb8f410639ff 100644 (file)
--- a/docker/postgresql/Dockerfile
+++ b/docker/postgresql/Dockerfile
@@ -1,7 +1,7 @@
  # PostgreSQL Docker container for Arvados.
  
  FROM arvados/debian:wheezy
-MAINTAINER Tim Pierce <twp@curoverse.com>
+MAINTAINER Ward Vandewege <ward@curoverse.com>
  
  # TODO(twp): parameterize variables via autoconf or similar.
  ENV POSTGRES_ROOT_PW   dummy_pw
@@ -19,7 +19,7 @@ ENV ARVADOS_PROD_USER  arvados
  ENV ARVADOS_PROD_PW    dummy_pw
  
  # Install postgres and apache
-RUN apt-get -q -y install procps postgresql postgresql-server-dev-9.1
+RUN apt-get install -qy procps postgresql postgresql-server-dev-9.1
  
  # Configure databases and users.
  ADD postgresql.conf /etc/postgresql/9.1/main/
diff --git a/docker/shell/Dockerfile b/docker/shell/Dockerfile

index 8235159999934244e319db16c8b7e472f58c9390..8f0ed41afaefc9ae4aa86daf86097fbcdcad1712 100644 (file)
--- a/docker/shell/Dockerfile
+++ b/docker/shell/Dockerfile
@@ -3,8 +3,8 @@
  FROM arvados/base
  MAINTAINER Ward Vandewege <ward@curoverse.com>
  
-RUN apt-get update -qq
-RUN apt-get install -qqy \
+RUN apt-get update -q
+RUN apt-get install -qy \
      python-pip python-pyvcf python-gflags python-google-api-python-client \
      python-virtualenv libattr1-dev libfuse-dev python-dev python-llfuse fuse \
      crunchstat python-arvados-fuse cron vim supervisor openssh-server
diff --git a/docker/slurm/Dockerfile b/docker/slurm/Dockerfile

index 7e4284f67f3521254a593c13fe8e35feac3af709..28a3c3b24c6a47ea03aeb63b3f08aee92bddd483 100644 (file)
--- a/docker/slurm/Dockerfile
+++ b/docker/slurm/Dockerfile
@@ -3,8 +3,8 @@
  FROM arvados/base
  MAINTAINER Ward Vandewege <ward@curoverse.com>
  
-RUN apt-get update -qq
-RUN apt-get install -qqy slurm-llnl munge
+RUN apt-get update -q
+RUN apt-get install -qy slurm-llnl munge
  
  ADD munge.key /etc/munge/
  RUN chown munge:munge /etc/munge/munge.key && chmod 600 /etc/munge/munge.key
diff --git a/docker/sso/Dockerfile b/docker/sso/Dockerfile

index 99e3f4e41dfd8fbd925edecc4e78b966416506e3..7d99ac6e73c21dd06466ec764904ba443709605b 100644 (file)
--- a/docker/sso/Dockerfile
+++ b/docker/sso/Dockerfile
@@ -7,8 +7,9 @@ RUN git clone git://github.com/curoverse/sso-devise-omniauth-provider.git /usr/s
      /usr/local/rvm/bin/rvm-exec default bundle install --gemfile=/usr/src/sso-provider/Gemfile
  
  # Install generated config files
-ADD generated/secret_token.rb /usr/src/sso-provider/config/initializers/secret_token.rb
  ADD generated/seeds.rb /usr/src/sso-provider/db/seeds.rb
+ADD generated/database.yml /usr/src/sso-provider/config/database.yml
+ADD generated/application.yml /usr/src/sso-provider/config/application.yml
  ADD generated/apache2_vhost /etc/apache2/sites-available/sso-provider
  ADD generated/apache2_vhost /etc/apache2/sites-available/sso-provider
  
@@ -18,9 +19,10 @@ RUN a2dissite default && \
      a2enmod rewrite && \
      a2enmod ssl && \
      cd /usr/src/sso-provider && \
+    cp config/environments/production.rb.example config/environments/production.rb && \
      RAILS_ENV=production /usr/local/rvm/bin/rvm-exec default bundle exec rake db:setup && \
      /usr/local/rvm/bin/rvm-exec default bundle exec rake assets:precompile && \
-    chown www-data:www-data tmp_omniauth log config.ru -R && \
+    chown www-data:www-data log config.ru -R && \
      chown www-data:www-data db db/production.sqlite3 && \
      /bin/mkdir /var/run/apache2
  
diff --git a/docker/sso/apache2_vhost.in b/docker/sso/apache2_vhost.in

index 554a86db073440460002f7876db44e3373100b9d..465a1e6e3a0da5f84dd8d39bf5b5963577684460 100644 (file)
--- a/docker/sso/apache2_vhost.in
+++ b/docker/sso/apache2_vhost.in
@@ -42,8 +42,6 @@
      SSLEngine on
      # SSLCertificateChainFile /etc/ssl/certs/startcom.sub.class1.server.ca.pem
      # SSLCACertificateFile    /etc/ssl/certs/startcom.ca.pem
-    # SSLCertificateFile      /etc/ssl/certs/qr1hi.arvadosapi.com.crt.pem
-    # SSLCertificateKeyFile   /etc/ssl/private/qr1hi.arvadosapi.com.key.pem
      SSLCertificateFile    /etc/ssl/certs/ssl-cert-snakeoil.pem
      SSLCertificateKeyFile /etc/ssl/private/ssl-cert-snakeoil.key
      SetEnvIf User-Agent ".*MSIE.*" nokeepalive ssl-unclean-shutdown
diff --git a/docker/sso/application.yml.in b/docker/sso/application.yml.in

new file mode 100644 (file)

index 0000000..6063851
--- /dev/null
+++ b/docker/sso/application.yml.in
@@ -0,0 +1,30 @@
+#
+# Consult application.default.yml for the full list of configuration
+# settings.
+#
+# The order of precedence is:
+# 1. config/environments/{RAILS_ENV}.rb (deprecated)
+# 2. Section in application.yml corresponding to RAILS_ENV (e.g., development)
+# 3. Section in application.yml called "common"
+# 4. Section in application.default.yml corresponding to RAILS_ENV
+# 5. Section in application.default.yml called "common"
+
+production:
+  allow_account_registration: true
+
+  secret_token: @@SSO_SECRET@@
+  uuid_prefix: 'zzzzz'
+
+  # If true, allow new creation of new accounts in the SSO server's internal
+  # user database.
+  allow_account_registration: true
+
+development:
+  # No development settings 
+
+test:
+  # No test settings 
+
+common:
+  # No common settings 
+
diff --git a/docker/sso/database.yml.in b/docker/sso/database.yml.in

new file mode 100644 (file)

index 0000000..025d62a
--- /dev/null
+++ b/docker/sso/database.yml.in
@@ -0,0 +1,22 @@
+# SQLite version 3.x
+#   gem install sqlite3-ruby (not necessary on OS X Leopard)
+development:
+  adapter: sqlite3
+  database: db/development.sqlite3
+  pool: 5
+  timeout: 5000
+
+# Warning: The database defined as "test" will be erased and
+# re-generated from your development database when you run "rake".
+# Do not set this db to the same as development or production.
+test:
+  adapter: sqlite3
+  database: db/test.sqlite3
+  pool: 5
+  timeout: 5000
+
+production:
+  adapter: sqlite3
+  database: db/production.sqlite3
+  pool: 5
+  timeout: 5000
diff --git a/docker/sso/secret_token.rb.in b/docker/sso/secret_token.rb.in

deleted file mode 100644 (file)

index bbe3f85..0000000
--- a/docker/sso/secret_token.rb.in
+++ /dev/null
@@ -1,7 +0,0 @@
-# Be sure to restart your server when you modify this file.
-
-# Your secret key for verifying the integrity of signed cookies.
-# If you change this key, all old signed cookies will become invalid!
-# Make sure the secret is at least 30 characters and all random,
-# no regular words or you'll be exposed to dictionary attacks.
-CfiOauthProvider::Application.config.secret_token = '@@SSO_SECRET@@'
diff --git a/docker/workbench/Dockerfile b/docker/workbench/Dockerfile

index 94d9f87765aeaf88abe53e9ff1c93e61421214ef..148153add20adc8b27138b67cb2f73a65e94867d 100644 (file)
--- a/docker/workbench/Dockerfile
+++ b/docker/workbench/Dockerfile
@@ -4,8 +4,8 @@ FROM arvados/passenger
  MAINTAINER Ward Vandewege <ward@curoverse.com>
  
  # We need graphviz for the provenance graphs
-RUN apt-get update -qq
-RUN apt-get install -qqy graphviz
+RUN apt-get update -q
+RUN apt-get install -qy graphviz
  
  # Update Arvados source
  RUN /bin/mkdir -p /usr/src/arvados/apps
@@ -28,10 +28,10 @@ ADD generated/apache2_vhost /etc/apache2/sites-available/workbench
  RUN \
    a2dissite default && \
    a2ensite workbench && \
-  a2enmod rewrite
+  a2enmod rewrite && \
+  /bin/mkdir /var/run/apache2
  
  ADD apache2_foreground.sh /etc/apache2/foreground.sh
  
  # Start Apache
  CMD ["/etc/apache2/foreground.sh"]
-
diff --git a/docker/workbench/apache2_foreground.sh b/docker/workbench/apache2_foreground.sh

index 5475ff0a199e9b29bc6891968fa2f30f32b85770..fc6028ea8391b3445e13adc5de4439fe7207ec2d 100755 (executable)
--- a/docker/workbench/apache2_foreground.sh
+++ b/docker/workbench/apache2_foreground.sh
@@ -3,10 +3,5 @@
  read pid cmd state ppid pgrp session tty_nr tpgid rest < /proc/self/stat
  trap "kill -TERM -$pgrp; exit" EXIT TERM KILL SIGKILL SIGTERM SIGQUIT
  
-# Override the default API server address if necessary.
-if [[ "$API_PORT_443_TCP_ADDR" != "" ]]; then
-  sed -i "s/localhost:9900/$API_PORT_443_TCP_ADDR/" /usr/src/arvados/apps/workbench/config/application.yml
-fi
-
  source /etc/apache2/envvars
  /usr/sbin/apache2 -D FOREGROUND
diff --git a/docker/workbench/apache2_vhost.in b/docker/workbench/apache2_vhost.in

index 05376ea709f8be4fd28b35ea25b87cfb1ce694c1..ba9e7f859572549d255b81a053caaf05195ebeb7 100644 (file)
--- a/docker/workbench/apache2_vhost.in
+++ b/docker/workbench/apache2_vhost.in
@@ -1,6 +1,7 @@
+
  <VirtualHost *:80>
  
-  ServerName workbench.@@API_HOSTNAME@@.@@ARVADOS_DOMAIN@@
+  ServerName workbench.@@ARVADOS_DOMAIN@@
    ServerAdmin sysadmin@curoverse.com
  
    RailsEnv @@WORKBENCH_RAILS_MODE@@
@@ -22,5 +23,13 @@
      allow from all
    </Directory>
  
-</VirtualHost>
+  <IfModule mod_ssl.c>
+    SSLEngine off
+    # SSLCertificateChainFile /etc/ssl/certs/startcom.sub.class1.server.ca.pem
+    # SSLCACertificateFile    /etc/ssl/certs/startcom.ca.pem
+    SSLCertificateFile    /etc/ssl/certs/ssl-cert-snakeoil.pem
+    SSLCertificateKeyFile /etc/ssl/private/ssl-cert-snakeoil.key
+    SetEnvIf User-Agent ".*MSIE.*" nokeepalive ssl-unclean-shutdown
+  </IfModule>
  
+</VirtualHost>
diff --git a/docker/workbench/application.yml.in b/docker/workbench/application.yml.in

index 31401885994f14b6c38f9cb7ce7e84bb7d3e8232..5e16928ffd61f3f43953e8a578e98928cfb291f3 100644 (file)
--- a/docker/workbench/application.yml.in
+++ b/docker/workbench/application.yml.in
@@ -15,8 +15,8 @@ common:
    secret_token: @@WORKBENCH_SECRET@@
  
    # You probably also want to point to your API server.
-  arvados_login_base: 'https://localhost:9900/login'
-  arvados_v1_base: 'https://localhost:9900/arvados/v1'
+  arvados_login_base: 'https://api.@@ARVADOS_DOMAIN@@/login'
+  arvados_v1_base: 'https://api.@@ARVADOS_DOMAIN@@/arvados/v1'
    arvados_insecure_https: @@WORKBENCH_INSECURE_HTTPS@@
  
    data_import_dir: @@WORKBENCH_DATA_IMPORT_DIR@@
@@ -24,4 +24,6 @@ common:
  
    site_name: @@WORKBENCH_SITE_NAME@@
    activation_contact_link: @@WORKBENCH_ACTIVATION_CONTACT_LINK@@
- 
+
+  arvados_docsite: http://doc.@@ARVADOS_DOMAIN@@
+  force_ssl: false
diff --git a/sdk/cli/bin/arv b/sdk/cli/bin/arv

index 36ec037bd80702b27137cb07824ca21cda641d99..252d267c4e4319f3c20dc87f334bf2af402ba1a9 100755 (executable)
--- a/sdk/cli/bin/arv
+++ b/sdk/cli/bin/arv
@@ -85,7 +85,15 @@ def init_config
  end
  
  
-subcommands = %w(copy create edit keep pipeline run tag ws)
+subcommands = %w(copy create edit keep pipeline run tag view ws)
+
+def exec_bin bin, opts
+  @bin_path = `which #{bin}`.strip
+  if @bin_path.empty?
+    raise "#{bin}: command not found"
+  end
+  exec @bin_path, *opts
+end
  
  def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
    case subcommand
@@ -93,18 +101,20 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
      arv_create client, arvados, global_opts, remaining_opts
    when 'edit'
      arv_edit client, arvados, global_opts, remaining_opts
+  when 'view'
+    arv_view client, arvados, global_opts, remaining_opts
    when 'copy', 'tag', 'ws', 'run'
-    exec `which arv-#{subcommand}`.strip, *remaining_opts
+    exec_bin "arv-#{subcommand}", remaining_opts
    when 'keep'
      @sub = remaining_opts.shift
      if ['get', 'put', 'ls', 'normalize'].index @sub then
        # Native Arvados
-      exec `which arv-#{@sub}`.strip, *remaining_opts
+      exec_bin "arv-#{@sub}", remaining_opts
      elsif ['less', 'check'].index @sub then
        # wh* shims
-      exec `which wh#{@sub}`.strip, *remaining_opts
+      exec_bin "wh#{@sub}", remaining_opts
      elsif @sub == 'docker'
-      exec `which arv-keepdocker`.strip, *remaining_opts
+      exec_bin "arv-keepdocker", remaining_opts
      else
        puts "Usage: arv keep [method] [--parameters]\n"
        puts "Use 'arv keep [method] --help' to get more information about specific methods.\n\n"
@@ -114,7 +124,7 @@ def check_subcommands client, arvados, subcommand, global_opts, remaining_opts
    when 'pipeline'
      sub = remaining_opts.shift
      if sub == 'run'
-      exec `which arv-run-pipeline-instance`.strip, *remaining_opts
+      exec_bin "arv-run-pipeline-instance", remaining_opts
      else
        puts "Usage: arv pipeline [method] [--parameters]\n"
        puts "Use 'arv pipeline [method] --help' to get more information about specific methods.\n\n"
@@ -150,14 +160,7 @@ end
  
  def edit_and_commit_object initial_obj, tmp_stem, global_opts, &block
  
-  content = case global_opts[:format]
-            when 'json'
-              Oj.dump(initial_obj, :indent => 1)
-            when 'yaml'
-              initial_obj.to_yaml
-            else
-              abort "Unrecognized format #{global_opts[:format]}"
-            end
+  content = get_obj_content initial_obj, global_opts
  
    tmp_file = Tempfile.new([tmp_stem, ".#{global_opts[:format]}"])
    tmp_file.write(content)
@@ -246,25 +249,7 @@ def check_response result
    results
  end
  
-def arv_edit client, arvados, global_opts, remaining_opts
-  uuid = remaining_opts.shift
-  if uuid.nil? or uuid == "-h" or uuid == "--help"
-    puts head_banner
-    puts "Usage: arv edit [uuid] [fields...]\n\n"
-    puts "Fetch the specified Arvados object, select the specified fields, \n"
-    puts "open an interactive text editor on a text representation (json or\n"
-    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
-    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
-    exit 255
-  end
-
-  if not $stdout.tty?
-    puts "Not connected to a TTY, cannot run interactive editor."
-    exit 1
-  end
-
-  # determine controller
-
+def lookup_uuid_rsc arvados, uuid
    m = /([a-z0-9]{5})-([a-z0-9]{5})-([a-z0-9]{15})/.match uuid
    if !m
      if /^[a-f0-9]{32}/.match uuid
@@ -287,6 +272,11 @@ def arv_edit client, arvados, global_opts, remaining_opts
      abort "Could not determine resource type #{m[2]}"
    end
  
+  return rsc
+end
+
+def fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
    begin
      result = client.execute(:api_method => eval('arvados.' + rsc + '.get'),
                              :parameters => {"uuid" => uuid},
@@ -294,15 +284,50 @@ def arv_edit client, arvados, global_opts, remaining_opts
                              :headers => {
                                authorization: 'OAuth2 '+ENV['ARVADOS_API_TOKEN']
                              })
-    oldobj = check_response result
+    obj = check_response result
    rescue => e
      abort "Server error: #{e}"
    end
  
    if remaining_opts.length > 0
-    oldobj.select! { |k, v| remaining_opts.include? k }
+    obj.select! { |k, v| remaining_opts.include? k }
    end
  
+  return obj
+end
+
+def get_obj_content obj, global_opts
+  content = case global_opts[:format]
+            when 'json'
+              Oj.dump(obj, :indent => 1)
+            when 'yaml'
+              obj.to_yaml
+            else
+              abort "Unrecognized format #{global_opts[:format]}"
+            end
+  return content
+end
+
+def arv_edit client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv edit [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields, \n"
+    puts "open an interactive text editor on a text representation (json or\n"
+    puts "yaml, use --format) and then update the object.  Will use 'nano'\n"
+    puts "by default, customize with the EDITOR or VISUAL environment variable.\n"
+    exit 255
+  end
+
+  if not $stdout.tty?
+    puts "Not connected to a TTY, cannot run interactive editor."
+    exit 1
+  end
+
+  rsc = lookup_uuid_rsc arvados, uuid
+  oldobj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+
    edit_and_commit_object oldobj, uuid, global_opts do |newobj|
      newobj.select! {|k| newobj[k] != oldobj[k]}
      if !newobj.empty?
@@ -323,6 +348,24 @@ def arv_edit client, arvados, global_opts, remaining_opts
    exit 0
  end
  
+def arv_view client, arvados, global_opts, remaining_opts
+  uuid = remaining_opts.shift
+  if uuid.nil? or uuid == "-h" or uuid == "--help"
+    puts head_banner
+    puts "Usage: arv view [uuid] [fields...]\n\n"
+    puts "Fetch the specified Arvados object, select the specified fields, \n"
+    puts "and print a text representation (json or yaml, use --format).\n"
+    exit 255
+  end
+
+  rsc = lookup_uuid_rsc arvados, uuid
+  obj = fetch_rsc_obj client, arvados, rsc, uuid, remaining_opts
+  content = get_obj_content obj, global_opts
+
+  puts content
+  exit 0
+end
+
  def arv_create client, arvados, global_opts, remaining_opts
    types = resource_types(arvados.discovery_document)
    create_opts = Trollop::options do
diff --git a/sdk/cli/bin/arv-tag b/sdk/cli/bin/arv-tag

index e400dab7dabdc656d6e552ed762194d0bc0b20fb..816c1e9320602a0959aeda033d764e9cabd631b6 100755 (executable)
--- a/sdk/cli/bin/arv-tag
+++ b/sdk/cli/bin/arv-tag
@@ -17,11 +17,13 @@ def usage
  end
  
  def api_call(method, parameters:{}, request_body:{})
-  request_body[:api_token] = ENV['ARVADOS_API_TOKEN']
    result = $client.execute(:api_method => method,
                             :parameters => parameters,
                             :body_object => request_body,
-                           :authenticated => false)
+                           :authenticated => false,
+                           :headers => {
+                             authorization: "OAuth2 #{ENV['ARVADOS_API_TOKEN']}",
+                           })
  
    begin
      results = JSON.parse result.body
@@ -99,7 +101,8 @@ $arvados_api_host = ENV['ARVADOS_API_HOST'] or
    abort "#{$0}: fatal: ARVADOS_API_HOST environment variable not set."
  $arvados_api_token = ENV['ARVADOS_API_TOKEN'] or
    abort "#{$0}: fatal: ARVADOS_API_TOKEN environment variable not set."
-$arvados_api_host_insecure = ENV['ARVADOS_API_HOST_INSECURE'] == 'yes'
+$arvados_api_host_insecure = %w(1 true yes).
+  include?((ENV['ARVADOS_API_HOST_INSECURE'] || "").downcase)
  
  begin
    require 'rubygems'
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job

index ea9a98717bb03085f7035f12206864f1a354582e..fc5005ab9cf089a94f0fd5d1f74ff5cfd7220f04 100755 (executable)
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -1,4 +1,4 @@
-#!/usr/bin/perl
+#!/usr/bin/env perl
  # -*- mode: perl; perl-indent-level: 2; indent-tabs-mode: nil; -*-
  
  =head1 NAME
@@ -98,6 +98,7 @@ use File::Path qw( make_path remove_tree );
  
  use constant TASK_TEMPFAIL => 111;
  use constant EX_TEMPFAIL => 75;
+use constant EX_RETRY_UNLOCKED => 93;
  
  $ENV{"TMPDIR"} ||= "/tmp";
  unless (defined $ENV{"CRUNCH_TMP"}) {
@@ -118,26 +119,28 @@ $ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
  $ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
  mkdir ($ENV{"JOB_WORK"});
  
+my %proc;
  my $force_unlock;
  my $git_dir;
  my $jobspec;
  my $job_api_token;
  my $no_clear_tmp;
  my $resume_stash;
+my $docker_bin = "docker.io";
  GetOptions('force-unlock' => \$force_unlock,
             'git-dir=s' => \$git_dir,
             'job=s' => \$jobspec,
             'job-api-token=s' => \$job_api_token,
             'no-clear-tmp' => \$no_clear_tmp,
             'resume-stash=s' => \$resume_stash,
+           'docker-bin=s' => \$docker_bin,
      );
  
  if (defined $job_api_token) {
    $ENV{ARVADOS_API_TOKEN} = $job_api_token;
  }
  
-my $have_slurm = exists $ENV{SLURM_JOBID} && exists $ENV{SLURM_NODELIST};
-my $local_job = 0;
+my $have_slurm = exists $ENV{SLURM_JOB_ID} && exists $ENV{SLURM_NODELIST};
  
  
  $SIG{'USR1'} = sub
@@ -149,8 +152,6 @@ $SIG{'USR2'} = sub
    $main::ENV{CRUNCH_DEBUG} = 0;
  };
  
-
-
  my $arv = Arvados->new('apiVersion' => 'v1');
  
  my $Job;
@@ -159,12 +160,40 @@ my $dbh;
  my $sth;
  my @jobstep;
  
-my $User = api_call("users/current");
-
+my $local_job;
  if ($jobspec =~ /^[-a-z\d]+$/)
  {
    # $jobspec is an Arvados UUID, not a JSON job specification
    $Job = api_call("jobs/get", uuid => $jobspec);
+  $local_job = 0;
+}
+else
+{
+  $local_job = JSON::decode_json($jobspec);
+}
+
+
+# Make sure our workers (our slurm nodes, localhost, or whatever) are
+# at least able to run basic commands: they aren't down or severely
+# misconfigured.
+my $cmd = ['true'];
+if (($Job || $local_job)->{docker_image_locator}) {
+  $cmd = [$docker_bin, 'ps', '-q'];
+}
+Log(undef, "Sanity check is `@$cmd`");
+srun(["srun", "--nodes=\Q$ENV{SLURM_NNODES}\E", "--ntasks-per-node=1"],
+     $cmd,
+     {fork => 1});
+if ($? != 0) {
+  Log(undef, "Sanity check failed: ".exit_status_s($?));
+  exit EX_TEMPFAIL;
+}
+Log(undef, "Sanity check OK");
+
+
+my $User = api_call("users/current");
+
+if (!$local_job) {
    if (!$force_unlock) {
      # Claim this job, and make sure nobody else does
      eval { api_call("jobs/lock", uuid => $Job->{uuid}); };
@@ -176,19 +205,17 @@ if ($jobspec =~ /^[-a-z\d]+$/)
  }
  else
  {
-  $Job = JSON::decode_json($jobspec);
-
    if (!$resume_stash)
    {
-    map { croak ("No $_ specified") unless $Job->{$_} }
+    map { croak ("No $_ specified") unless $local_job->{$_} }
      qw(script script_version script_parameters);
    }
  
-  $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
-  $Job->{'started_at'} = gmtime;
-  $Job->{'state'} = 'Running';
+  $local_job->{'is_locked_by_uuid'} = $User->{'uuid'};
+  $local_job->{'started_at'} = gmtime;
+  $local_job->{'state'} = 'Running';
  
-  $Job = api_call("jobs/create", job => $Job);
+  $Job = api_call("jobs/create", job => $local_job);
  }
  $job_id = $Job->{'uuid'};
  
@@ -265,9 +292,16 @@ foreach (@sinfo)
    {
      Log (undef, "node $nodename - $ncpus slots");
      my $node = { name => $nodename,
-                ncpus => $ncpus,
-                losing_streak => 0,
-                hold_until => 0 };
+                 ncpus => $ncpus,
+                 # The number of consecutive times a task has been dispatched
+                 # to this node and failed.
+                 losing_streak => 0,
+                 # The number of consecutive times that SLURM has reported
+                 # a node failure since the last successful task.
+                 fail_count => 0,
+                 # Don't dispatch work to this node until this time
+                 # (in seconds since the epoch) has passed.
+                 hold_until => 0 };
      foreach my $cpu (1..$ncpus)
      {
        push @slot, { node => $node,
@@ -315,8 +349,7 @@ my @jobstep_todo = ();
  my @jobstep_done = ();
  my @jobstep_tomerge = ();
  my $jobstep_tomerge_level = 0;
-my $squeue_checked;
-my $squeue_kill_checked;
+my $squeue_checked = 0;
  my $latest_refresh = scalar time;
  
  
@@ -362,7 +395,7 @@ if (!defined $no_clear_tmp) {
      # TODO: When #5036 is done and widely deployed, we can get rid of the
      # regular expression and just unmount everything with type fuse.keep.
      srun (["srun", "--nodelist=$nodelist", "-D", $ENV{'TMPDIR'}],
-          ['bash', '-ec', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
+          ['bash', '-ec', '-o', 'pipefail', 'mount -t fuse,fuse.keep | awk \'($3 ~ /\ykeep\y/){print $3}\' | xargs -r -n 1 fusermount -u -z; sleep 1; rm -rf $JOB_WORK $CRUNCH_INSTALL $CRUNCH_TMP/task $CRUNCH_TMP/src* $CRUNCH_TMP/*.cid']);
      exit (1);
    }
    while (1)
@@ -371,12 +404,14 @@ if (!defined $no_clear_tmp) {
      freeze_if_want_freeze ($cleanpid);
      select (undef, undef, undef, 0.1);
    }
-  Log (undef, "Cleanup command exited ".exit_status_s($?));
+  if ($?) {
+    Log(undef, "Clean work dirs: exit ".exit_status_s($?));
+    exit(EX_RETRY_UNLOCKED);
+  }
  }
  
  # If this job requires a Docker image, install that.
-my $docker_bin = "/usr/bin/docker.io";
-my ($docker_locator, $docker_stream, $docker_hash);
+my ($docker_locator, $docker_stream, $docker_hash, $docker_limitmem);
  if ($docker_locator = $Job->{docker_image_locator}) {
    ($docker_stream, $docker_hash) = find_docker_image($docker_locator);
    if (!$docker_hash)
@@ -408,6 +443,12 @@ fi
            .exit_status_s($?));
    }
  
+  # Determine whether this version of Docker supports memory+swap limits.
+  srun(["srun", "--nodelist=" . $node[0]],
+       ["/bin/sh", "-ec", "$docker_bin run --help | grep -qe --memory-swap="],
+      {fork => 1});
+  $docker_limitmem = ($? == 0);
+
    if ($Job->{arvados_sdk_version}) {
      # The job also specifies an Arvados SDK version.  Add the SDKs to the
      # tar file for the build script to install.
@@ -557,7 +598,7 @@ else {
    unless ($? == 0 && $sha1 =~ /^([0-9a-f]{40})$/) {
      croak("`$gitcmd rev-list` exited "
            .exit_status_s($?)
-          .", '$treeish' not found. Giving up.");
+          .", '$treeish' not found, giving up");
    }
    $commit = $1;
    Log(undef, "Version $treeish is commit $commit");
@@ -583,32 +624,89 @@ if (!defined $git_archive) {
    }
  }
  else {
-  Log(undef, "Run install script on all workers");
-
-  my @srunargs = ("srun",
-                  "--nodelist=$nodelist",
-                  "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
-  my @execargs = ("sh", "-c",
-                  "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
+  my $install_exited;
+  my $install_script_tries_left = 3;
+  for (my $attempts = 0; $attempts < 3; $attempts++) {
+    Log(undef, "Run install script on all workers");
+
+    my @srunargs = ("srun",
+                    "--nodelist=$nodelist",
+                    "-D", $ENV{'TMPDIR'}, "--job-name=$job_id");
+    my @execargs = ("sh", "-c",
+                    "mkdir -p $ENV{CRUNCH_INSTALL} && cd $ENV{CRUNCH_TMP} && perl -");
+
+    $ENV{"CRUNCH_GIT_ARCHIVE_HASH"} = md5_hex($git_archive);
+    my ($install_stderr_r, $install_stderr_w);
+    pipe $install_stderr_r, $install_stderr_w or croak("pipe() failed: $!");
+    set_nonblocking($install_stderr_r);
+    my $installpid = fork();
+    if ($installpid == 0)
+    {
+      close($install_stderr_r);
+      fcntl($install_stderr_w, F_SETFL, 0) or croak($!); # no close-on-exec
+      open(STDOUT, ">&", $install_stderr_w);
+      open(STDERR, ">&", $install_stderr_w);
+      srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
+      exit (1);
+    }
+    close($install_stderr_w);
+    # Tell freeze_if_want_freeze how to kill the child, otherwise the
+    # "waitpid(installpid)" loop won't get interrupted by a freeze:
+    $proc{$installpid} = {};
+    my $stderr_buf = '';
+    # Track whether anything appears on stderr other than slurm errors
+    # ("srun: ...") and the "starting: ..." message printed by the
+    # srun subroutine itself:
+    my $stderr_anything_from_script = 0;
+    my $match_our_own_errors = '^(srun: error: |starting: \[)';
+    while ($installpid != waitpid(-1, WNOHANG)) {
+      freeze_if_want_freeze ($installpid);
+      # Wait up to 0.1 seconds for something to appear on stderr, then
+      # do a non-blocking read.
+      my $bits = fhbits($install_stderr_r);
+      select ($bits, undef, $bits, 0.1);
+      if (0 < sysread ($install_stderr_r, $stderr_buf, 8192, length($stderr_buf)))
+      {
+        while ($stderr_buf =~ /^(.*?)\n/) {
+          my $line = $1;
+          substr $stderr_buf, 0, 1+length($line), "";
+          Log(undef, "stderr $line");
+          if ($line !~ /$match_our_own_errors/) {
+            $stderr_anything_from_script = 1;
+          }
+        }
+      }
+    }
+    delete $proc{$installpid};
+    $install_exited = $?;
+    close($install_stderr_r);
+    if (length($stderr_buf) > 0) {
+      if ($stderr_buf !~ /$match_our_own_errors/) {
+        $stderr_anything_from_script = 1;
+      }
+      Log(undef, "stderr $stderr_buf")
+    }
  
-  my $installpid = fork();
-  if ($installpid == 0)
-  {
-    srun (\@srunargs, \@execargs, {}, $build_script . $git_archive);
-    exit (1);
+    Log (undef, "Install script exited ".exit_status_s($install_exited));
+    last if $install_exited == 0 || $main::please_freeze;
+    # If the install script fails but doesn't print an error message,
+    # the next thing anyone is likely to do is just run it again in
+    # case it was a transient problem like "slurm communication fails
+    # because the network isn't reliable enough". So we'll just do
+    # that ourselves (up to 3 attempts in total). OTOH, if there is an
+    # error message, the problem is more likely to have a real fix and
+    # we should fail the job so the fixing process can start, instead
+    # of doing 2 more attempts.
+    last if $stderr_anything_from_script;
    }
-  while (1)
-  {
-    last if $installpid == waitpid (-1, WNOHANG);
-    freeze_if_want_freeze ($installpid);
-    select (undef, undef, undef, 0.1);
-  }
-  my $install_exited = $?;
-  Log (undef, "Install script exited ".exit_status_s($install_exited));
+
    foreach my $tar_filename (map { tar_filename_n($_); } (1..$git_tar_count)) {
      unlink($tar_filename);
    }
-  exit (1) if $install_exited != 0;
+
+  if ($install_exited != 0) {
+    croak("Giving up");
+  }
  }
  
  foreach (qw (script script_version script_parameters runtime_constraints))
@@ -633,16 +731,48 @@ ONELEVEL:
  my $thisround_succeeded = 0;
  my $thisround_failed = 0;
  my $thisround_failed_multiple = 0;
+my $working_slot_count = scalar(@slot);
  
  @jobstep_todo = sort { $jobstep[$a]->{level} <=> $jobstep[$b]->{level}
                        or $a <=> $b } @jobstep_todo;
  my $level = $jobstep[$jobstep_todo[0]]->{level};
-Log (undef, "start level $level");
  
+my $initial_tasks_this_level = 0;
+foreach my $id (@jobstep_todo) {
+  $initial_tasks_this_level++ if ($jobstep[$id]->{level} == $level);
+}
+
+# If the number of tasks scheduled at this level #T is smaller than the number
+# of slots available #S, only use the first #T slots, or the first slot on
+# each node, whichever number is greater.
+#
+# When we dispatch tasks later, we'll allocate whole-node resources like RAM
+# based on these numbers.  Using fewer slots makes more resources available
+# to each individual task, which should normally be a better strategy when
+# there are fewer of them running with less parallelism.
+#
+# Note that this calculation is not redone if the initial tasks at
+# this level queue more tasks at the same level.  This may harm
+# overall task throughput for that level.
+my @freeslot;
+if ($initial_tasks_this_level < @node) {
+  @freeslot = (0..$#node);
+} elsif ($initial_tasks_this_level < @slot) {
+  @freeslot = (0..$initial_tasks_this_level - 1);
+} else {
+  @freeslot = (0..$#slot);
+}
+my $round_num_freeslots = scalar(@freeslot);
  
+my %round_max_slots = ();
+for (my $ii = $#freeslot; $ii >= 0; $ii--) {
+  my $this_slot = $slot[$freeslot[$ii]];
+  my $node_name = $this_slot->{node}->{name};
+  $round_max_slots{$node_name} ||= $this_slot->{cpu};
+  last if (scalar(keys(%round_max_slots)) >= @node);
+}
  
-my %proc;
-my @freeslot = (0..$#slot);
+Log(undef, "start level $level with $round_num_freeslots slots");
  my @holdslot;
  my %reader;
  my $progress_is_dirty = 1;
@@ -651,7 +781,6 @@ my $progress_stats_updated = 0;
  update_progress_stats();
  
  
-
  THISROUND:
  for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
  {
@@ -662,9 +791,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
      next;
    }
  
-  pipe $reader{$id}, "writer" or croak ($!);
-  my $flags = fcntl ($reader{$id}, F_GETFL, 0) or croak ($!);
-  fcntl ($reader{$id}, F_SETFL, $flags | O_NONBLOCK) or croak ($!);
+  pipe $reader{$id}, "writer" or croak("pipe() failed: $!");
+  set_nonblocking($reader{$id});
  
    my $childslot = $freeslot[0];
    my $childnode = $slot[$childslot]->{node};
@@ -705,7 +833,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
      $ENV{"HOME"} = $ENV{"TASK_WORK"};
      $ENV{"TASK_KEEPMOUNT"} = $ENV{"TASK_WORK"}.".keep";
      $ENV{"TASK_TMPDIR"} = $ENV{"TASK_WORK"}; # deprecated
-    $ENV{"CRUNCH_NODE_SLOTS"} = $slot[$childslot]->{node}->{ncpus};
+    $ENV{"CRUNCH_NODE_SLOTS"} = $round_max_slots{$ENV{TASK_SLOT_NODE}};
      $ENV{"PATH"} = $ENV{"CRUNCH_INSTALL"} . "/bin:" . $ENV{"PATH"};
  
      $ENV{"GZIP"} = "-n";
@@ -720,21 +848,25 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
         "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
          ."mkdir -p $ENV{CRUNCH_TMP} $ENV{JOB_WORK} $ENV{TASK_WORK} $ENV{TASK_KEEPMOUNT} "
         ."&& cd $ENV{CRUNCH_TMP} "
-        ."&& MEM=\$(cat /proc/meminfo | grep MemTotal | sed 's/\\s\\s*/ /g' |cut -d' ' -f2) "
-        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) ";
+        # These environment variables get used explicitly later in
+        # $command.  No tool is expected to read these values directly.
+        .q{&& MEM=$(awk '($1 == "MemTotal:"){print $2}' </proc/meminfo) }
+        .q{&& SWAP=$(awk '($1 == "SwapTotal:"){print $2}' </proc/meminfo) }
+        ."&& MEMLIMIT=\$(( (\$MEM * 95) / ($ENV{CRUNCH_NODE_SLOTS} * 100) )) "
+        ."&& let SWAPLIMIT=\$MEMLIMIT+\$SWAP ";
      $command .= "&& exec arv-mount --by-id --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
      if ($docker_hash)
      {
        my $cidfile = "$ENV{CRUNCH_TMP}/$Jobstep->{arvados_task}->{uuid}-$Jobstep->{failures}.cid";
        $command .= "crunchstat -cgroup-root=/sys/fs/cgroup -cgroup-parent=docker -cgroup-cid=$cidfile -poll=10000 ";
-      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy --memory=\${MEMLIMIT}k ";
-
-      # Dynamically configure the container to use the host system as its
-      # DNS server.  Get the host's global addresses from the ip command,
-      # and turn them into docker --dns options using gawk.
-      $command .=
-          q{$(ip -o address show scope global |
-              gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
+      $command .= "$docker_bin run --rm=true --attach=stdout --attach=stderr --attach=stdin -i --user=crunch --cidfile=$cidfile --sig-proxy ";
+      # We only set memory limits if Docker lets us limit both memory and swap.
+      # Memory limits alone have been supported longer, but subprocesses tend
+      # to get SIGKILL if they exceed that without any swap limit set.
+      # See #5642 for additional background.
+      if ($docker_limitmem) {
+        $command .= "--memory=\${MEMLIMIT}k --memory-swap=\${SWAPLIMIT}k ";
+      }
  
        # The source tree and $destdir directory (which we have
        # installed on the worker host) are available in the container,
@@ -822,6 +954,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
    $Jobstep->{slotindex} = $childslot;
    delete $Jobstep->{stderr};
    delete $Jobstep->{finishtime};
+  delete $Jobstep->{tempfail};
  
    $Jobstep->{'arvados_task'}->{started_at} = strftime "%Y-%m-%dT%H:%M:%SZ", gmtime($Jobstep->{starttime});
    $Jobstep->{'arvados_task'}->save;
@@ -833,7 +966,7 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
  
    while (!@freeslot
          ||
-        (@slot > @freeslot && $todo_ptr+1 > $#jobstep_todo))
+        ($round_num_freeslots > @freeslot && $todo_ptr+1 > $#jobstep_todo))
    {
      last THISROUND if $main::please_freeze || defined($main::success);
      if ($main::please_info)
@@ -854,10 +987,12 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
        update_progress_stats();
        select (undef, undef, undef, 0.1);
      }
-    elsif (time - $progress_stats_updated >= 30)
+    elsif (time - $progress_stats_updated >= 30 || $progress_is_dirty)
      {
        update_progress_stats();
      }
+    $working_slot_count = scalar(grep { $_->{node}->{fail_count} == 0 &&
+                                        $_->{node}->{hold_count} < 4 } @slot);
      if (($thisround_failed_multiple >= 8 && $thisround_succeeded == 0) ||
         ($thisround_failed_multiple >= 16 && $thisround_failed_multiple > $thisround_succeeded))
      {
@@ -881,10 +1016,8 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
      }
  
      # give up if no nodes are succeeding
-    if (!grep { $_->{node}->{losing_streak} == 0 &&
-                    $_->{node}->{hold_count} < 4 } @slot) {
-      my $message = "Every node has failed -- giving up on this round";
-      Log (undef, $message);
+    if ($working_slot_count < 1) {
+      Log(undef, "Every node has failed -- giving up");
        last THISROUND;
      }
    }
@@ -920,18 +1053,18 @@ freeze_if_want_freeze();
  
  if (!defined $main::success)
  {
-  if (@jobstep_todo &&
-      $thisround_succeeded == 0 &&
-      ($thisround_failed == 0 || $thisround_failed > 4))
-  {
+  if (!@jobstep_todo) {
+    $main::success = 1;
+  } elsif ($working_slot_count < 1) {
+    save_output_collection();
+    save_meta();
+    exit(EX_RETRY_UNLOCKED);
+  } elsif ($thisround_succeeded == 0 &&
+           ($thisround_failed == 0 || $thisround_failed > 4)) {
      my $message = "stop because $thisround_failed tasks failed and none succeeded";
      Log (undef, $message);
      $main::success = 0;
    }
-  if (!@jobstep_todo)
-  {
-    $main::success = 1;
-  }
  }
  
  goto ONELEVEL if !defined $main::success;
@@ -939,16 +1072,7 @@ goto ONELEVEL if !defined $main::success;
  
  release_allocation();
  freeze();
-my $collated_output = &create_output_collection();
-
-if (!$collated_output) {
-  Log (undef, "Failed to write output collection");
-}
-else {
-  Log(undef, "job output $collated_output");
-  $Job->update_attributes('output' => $collated_output);
-}
-
+my $collated_output = save_output_collection();
  Log (undef, "finish");
  
  save_meta();
@@ -970,8 +1094,8 @@ sub update_progress_stats
    $progress_stats_updated = time;
    return if !$progress_is_dirty;
    my ($todo, $done, $running) = (scalar @jobstep_todo,
-                                scalar @jobstep_done,
-                                scalar @slot - scalar @freeslot - scalar @holdslot);
+                                 scalar @jobstep_done,
+                                 scalar keys(%proc));
    $Job->{'tasks_summary'} ||= {};
    $Job->{'tasks_summary'}->{'todo'} = $todo;
    $Job->{'tasks_summary'}->{'done'} = $done;
@@ -1013,7 +1137,7 @@ sub reapchildren
    if (!$task_success)
    {
      my $temporary_fail;
-    $temporary_fail ||= $Jobstep->{node_fail};
+    $temporary_fail ||= $Jobstep->{tempfail};
      $temporary_fail ||= ($exitvalue == TASK_TEMPFAIL);
  
      ++$thisround_failed;
@@ -1035,7 +1159,7 @@ sub reapchildren
  
      Log ($jobstepid, sprintf('failure (#%d, %s) after %d seconds',
                               ++$Jobstep->{'failures'},
-                             $temporary_fail ? 'temporary ' : 'permanent',
+                             $temporary_fail ? 'temporary' : 'permanent',
                               $elapsed));
  
      if (!$temporary_fail || $Jobstep->{'failures'} >= 3) {
@@ -1051,6 +1175,7 @@ sub reapchildren
      ++$thisround_succeeded;
      $slot[$proc{$pid}->{slot}]->{node}->{losing_streak} = 0;
      $slot[$proc{$pid}->{slot}]->{node}->{hold_until} = 0;
+    $slot[$proc{$pid}->{slot}]->{node}->{fail_count} = 0;
      push @jobstep_done, $jobstepid;
      Log ($jobstepid, "success in $elapsed seconds");
    }
@@ -1125,29 +1250,45 @@ sub check_refresh_wanted
  
  sub check_squeue
  {
-  # return if the kill list was checked <4 seconds ago
-  if (defined $squeue_kill_checked && $squeue_kill_checked > time - 4)
-  {
-    return;
-  }
-  $squeue_kill_checked = time;
+  my $last_squeue_check = $squeue_checked;
  
-  # use killem() on procs whose killtime is reached
-  for (keys %proc)
+  # Do not call `squeue` or check the kill list more than once every
+  # 15 seconds.
+  return if $last_squeue_check > time - 15;
+  $squeue_checked = time;
+
+  # Look for children from which we haven't received stderr data since
+  # the last squeue check. If no such children exist, all procs are
+  # alive and there's no need to even look at squeue.
+  #
+  # As long as the crunchstat poll interval (10s) is shorter than the
+  # squeue check interval (15s) this should make the squeue check an
+  # infrequent event.
+  my $silent_procs = 0;
+  for my $jobstep (values %proc)
    {
-    if (exists $proc{$_}->{killtime}
-       && $proc{$_}->{killtime} <= time)
+    if ($jobstep->{stderr_at} < $last_squeue_check)
      {
-      killem ($_);
+      $silent_procs++;
      }
    }
+  return if $silent_procs == 0;
  
-  # return if the squeue was checked <60 seconds ago
-  if (defined $squeue_checked && $squeue_checked > time - 60)
+  # use killem() on procs whose killtime is reached
+  while (my ($pid, $jobstep) = each %proc)
    {
-    return;
+    if (exists $jobstep->{killtime}
+        && $jobstep->{killtime} <= time
+        && $jobstep->{stderr_at} < $last_squeue_check)
+    {
+      my $sincewhen = "";
+      if ($jobstep->{stderr_at}) {
+        $sincewhen = " in last " . (time - $jobstep->{stderr_at}) . "s";
+      }
+      Log($jobstep->{jobstep}, "killing orphaned srun process $pid (task not in slurm queue, no stderr received$sincewhen)");
+      killem ($pid);
+    }
    }
-  $squeue_checked = time;
  
    if (!$have_slurm)
    {
@@ -1155,37 +1296,46 @@ sub check_squeue
      return;
    }
  
-  # get a list of steps still running
-  my @squeue = `squeue -s -h -o '%i %j' && echo ok`;
-  chop @squeue;
-  if ($squeue[-1] ne "ok")
+  # Get a list of steps still running.  Note: squeue(1) says --steps
+  # selects a format (which we override anyway) and allows us to
+  # specify which steps we're interested in (which we don't).
+  # Importantly, it also changes the meaning of %j from "job name" to
+  # "step name" and (although this isn't mentioned explicitly in the
+  # docs) switches from "one line per job" mode to "one line per step"
+  # mode. Without it, we'd just get a list of one job, instead of a
+  # list of N steps.
+  my @squeue = `squeue --jobs=\Q$ENV{SLURM_JOB_ID}\E --steps --format='%j' --noheader`;
+  if ($? != 0)
    {
+    Log(undef, "warning: squeue exit status $? ($!)");
      return;
    }
-  pop @squeue;
+  chop @squeue;
  
    # which of my jobsteps are running, according to squeue?
    my %ok;
-  foreach (@squeue)
+  for my $jobstepname (@squeue)
    {
-    if (/^(\d+)\.(\d+) (\S+)/)
-    {
-      if ($1 eq $ENV{SLURM_JOBID})
-      {
-       $ok{$3} = 1;
-      }
-    }
+    $ok{$jobstepname} = 1;
    }
  
-  # which of my active child procs (>60s old) were not mentioned by squeue?
-  foreach (keys %proc)
+  # Check for child procs >60s old and not mentioned by squeue.
+  while (my ($pid, $jobstep) = each %proc)
    {
-    if ($proc{$_}->{time} < time - 60
-       && !exists $ok{$proc{$_}->{jobstepname}}
-       && !exists $proc{$_}->{killtime})
+    if ($jobstep->{time} < time - 60
+        && $jobstep->{jobstepname}
+        && !exists $ok{$jobstep->{jobstepname}}
+        && !exists $jobstep->{killtime})
      {
-      # kill this proc if it hasn't exited in 30 seconds
-      $proc{$_}->{killtime} = time + 30;
+      # According to slurm, this task has ended (successfully or not)
+      # -- but our srun child hasn't exited. First we must wait (30
+      # seconds) in case this is just a race between communication
+      # channels. Then, if our srun child process still hasn't
+      # terminated, we'll conclude some slurm communication
+      # error/delay has caused the task to die without notifying srun,
+      # and we'll kill srun ourselves.
+      $jobstep->{killtime} = time + 30;
+      Log($jobstep->{jobstep}, "notice: task is not in slurm queue but srun process $pid has not exited");
      }
    }
  }
@@ -1196,7 +1346,7 @@ sub release_allocation
    if ($have_slurm)
    {
      Log (undef, "release job allocation");
-    system "scancel $ENV{SLURM_JOBID}";
+    system "scancel $ENV{SLURM_JOB_ID}";
    }
  }
  
@@ -1210,6 +1360,7 @@ sub readfrompipes
      while (0 < sysread ($reader{$job}, $buf, 8192))
      {
        print STDERR $buf if $ENV{CRUNCH_DEBUG};
+      $jobstep[$job]->{stderr_at} = time;
        $jobstep[$job]->{stderr} .= $buf;
        preprocess_stderr ($job);
        if (length ($jobstep[$job]->{stderr}) > 16384)
@@ -1235,10 +1386,19 @@ sub preprocess_stderr
        # whoa.
        $main::please_freeze = 1;
      }
-    elsif ($line =~ /srun: error: (Node failure on|Unable to create job step|.*: Communication connection failure)/) {
-      $jobstep[$job]->{node_fail} = 1;
+    elsif ($line =~ /srun: error: Node failure on/) {
+      my $job_slot_index = $jobstep[$job]->{slotindex};
+      $slot[$job_slot_index]->{node}->{fail_count}++;
+      $jobstep[$job]->{tempfail} = 1;
+      ban_node_by_slot($job_slot_index);
+    }
+    elsif ($line =~ /srun: error: (Unable to create job step|.*: Communication connection failure)/) {
+      $jobstep[$job]->{tempfail} = 1;
        ban_node_by_slot($jobstep[$job]->{slotindex});
      }
+    elsif ($line =~ /arvados\.errors\.Keep/) {
+      $jobstep[$job]->{tempfail} = 1;
+    }
    }
  }
  
@@ -1357,6 +1517,20 @@ print (arvados.api("v1").collections().
    return $joboutput;
  }
  
+# Calls create_output_collection, logs the result, and returns it.
+# If that was successful, save that as the output in the job record.
+sub save_output_collection {
+  my $collated_output = create_output_collection();
+
+  if (!$collated_output) {
+    Log(undef, "Failed to write output collection");
+  }
+  else {
+    Log(undef, "job output $collated_output");
+    $Job->update_attributes('output' => $collated_output);
+  }
+  return $collated_output;
+}
  
  sub killem
  {
@@ -1402,6 +1576,8 @@ sub fhbits
  # Send log output to Keep via arv-put.
  #
  # $log_pipe_in and $log_pipe_out are the input and output filehandles to the arv-put pipe.
+# $log_pipe_out_buf is a string containing all output read from arv-put so far.
+# $log_pipe_out_select is an IO::Select object around $log_pipe_out.
  # $log_pipe_pid is the pid of the arv-put subprocess.
  #
  # The only functions that should access these variables directly are:
@@ -1410,6 +1586,13 @@ sub fhbits
  #     Starts an arv-put pipe, reading data on stdin and writing it to
  #     a $logfilename file in an output collection.
  #
+# log_writer_read_output([$timeout])
+#     Read output from $log_pipe_out and append it to $log_pipe_out_buf.
+#     Passes $timeout to the select() call, with a default of 0.01.
+#     Returns the result of the last read() call on $log_pipe_out, or
+#     -1 if read() wasn't called because select() timed out.
+#     Only other log_writer_* functions should need to call this.
+#
  # log_writer_send($txt)
  #     Writes $txt to the output log collection.
  #
@@ -1420,25 +1603,40 @@ sub fhbits
  #     Returns a true value if there is currently a live arv-put
  #     process, false otherwise.
  #
-my ($log_pipe_in, $log_pipe_out, $log_pipe_pid);
+my ($log_pipe_in, $log_pipe_out, $log_pipe_out_buf, $log_pipe_out_select,
+    $log_pipe_pid);
  
  sub log_writer_start($)
  {
    my $logfilename = shift;
    $log_pipe_pid = open2($log_pipe_out, $log_pipe_in,
                          'arv-put',
-                        '--portable-data-hash',
-                        '--project-uuid', $Job->{owner_uuid},
+                        '--stream',
                          '--retries', '3',
-                        '--name', $logfilename,
                          '--filename', $logfilename,
                          '-');
+  $log_pipe_out_buf = "";
+  $log_pipe_out_select = IO::Select->new($log_pipe_out);
+}
+
+sub log_writer_read_output {
+  my $timeout = shift || 0.01;
+  my $read = -1;
+  while ($read && $log_pipe_out_select->can_read($timeout)) {
+    $read = read($log_pipe_out, $log_pipe_out_buf, 65536,
+                 length($log_pipe_out_buf));
+  }
+  if (!defined($read)) {
+    Log(undef, "error reading log manifest from arv-put: $!");
+  }
+  return $read;
  }
  
  sub log_writer_send($)
  {
    my $txt = shift;
    print $log_pipe_in $txt;
+  log_writer_read_output();
  }
  
  sub log_writer_finish()
@@ -1446,22 +1644,24 @@ sub log_writer_finish()
    return unless $log_pipe_pid;
  
    close($log_pipe_in);
-  my $arv_put_output;
  
-  my $s = IO::Select->new($log_pipe_out);
-  if ($s->can_read(120)) {
-    sysread($log_pipe_out, $arv_put_output, 1024);
-    chomp($arv_put_output);
-  } else {
+  my $read_result = log_writer_read_output(120);
+  if ($read_result == -1) {
      Log (undef, "timed out reading from 'arv-put'");
+  } elsif ($read_result != 0) {
+    Log(undef, "failed to read arv-put log manifest to EOF");
    }
  
    waitpid($log_pipe_pid, 0);
-  $log_pipe_pid = $log_pipe_in = $log_pipe_out = undef;
    if ($?) {
-    Log("log_writer_finish: arv-put exited ".exit_status_s($?))
+    Log(undef, "log_writer_finish: arv-put exited " . exit_status_s($?))
    }
  
+  close($log_pipe_out);
+  my $arv_put_output = $log_pipe_out_buf;
+  $log_pipe_pid = $log_pipe_in = $log_pipe_out = $log_pipe_out_buf =
+      $log_pipe_out_select = undef;
+
    return $arv_put_output;
  }
  
@@ -1525,10 +1725,21 @@ sub save_meta
    return if $justcheckpoint;  # checkpointing is not relevant post-Warehouse.pm
    return unless log_writer_is_active();
  
-  my $loglocator = log_writer_finish();
-  Log (undef, "log manifest is $loglocator");
-  $Job->{'log'} = $loglocator;
-  $Job->update_attributes('log', $loglocator);
+  my $log_manifest = "";
+  if ($Job->{log}) {
+    my $prev_log_coll = api_call("collections/get", uuid => $Job->{log});
+    $log_manifest .= $prev_log_coll->{manifest_text};
+  }
+  $log_manifest .= log_writer_finish();
+
+  my $log_coll = api_call(
+    "collections/create", ensure_unique_name => 1, collection => {
+      manifest_text => $log_manifest,
+      owner_uuid => $Job->{owner_uuid},
+      name => sprintf("Log from %s job %s", $Job->{script}, $Job->{uuid}),
+    });
+  Log(undef, "log collection is " . $log_coll->{portable_data_hash});
+  $Job->update_attributes('log' => $log_coll->{portable_data_hash});
  }
  
  
@@ -1604,7 +1815,13 @@ sub srun
    my $show_cmd = Dumper($args);
    $show_cmd =~ s/(TOKEN\\*=)[^\s\']+/${1}[...]/g;
    $show_cmd =~ s/\n/ /g;
-  warn "starting: $show_cmd\n";
+  if ($opts->{fork}) {
+    Log(undef, "starting: $show_cmd");
+  } else {
+    # This is a child process: parent is in charge of reading our
+    # stderr and copying it to Log() if needed.
+    warn "starting: $show_cmd\n";
+  }
  
    if (defined $stdin) {
      my $child = open STDIN, "-|";
@@ -1813,8 +2030,14 @@ sub combined_git_archive {
    return $tar_contents;
  }
  
+sub set_nonblocking {
+  my $fh = shift;
+  my $flags = fcntl ($fh, F_GETFL, 0) or croak ($!);
+  fcntl ($fh, F_SETFL, $flags | O_NONBLOCK) or croak ($!);
+}
+
  __DATA__
-#!/usr/bin/perl
+#!/usr/bin/env perl
  #
  # This is crunch-job's internal dispatch script.  crunch-job running on the API
  # server invokes this script on individual compute nodes, or localhost if we're
@@ -1840,12 +2063,15 @@ use constant TASK_TEMPFAIL => 111;
  my %SDK_ENVVARS = ("perl/lib" => "PERLLIB", "ruby/lib" => "RUBYLIB");
  
  my $destdir = $ENV{"CRUNCH_SRC"};
-my $commit = $ENV{"CRUNCH_SRC_COMMIT"};
+my $archive_hash = $ENV{"CRUNCH_GIT_ARCHIVE_HASH"};
  my $repo = $ENV{"CRUNCH_SRC_URL"};
  my $install_dir = $ENV{"CRUNCH_INSTALL"} || (getcwd() . "/opt");
  my $job_work = $ENV{"JOB_WORK"};
  my $task_work = $ENV{"TASK_WORK"};
  
+open(STDOUT_ORIG, ">&", STDOUT);
+open(STDERR_ORIG, ">&", STDERR);
+
  for my $dir ($destdir, $job_work, $task_work) {
    if ($dir) {
      make_path $dir;
@@ -1857,11 +2083,6 @@ if ($task_work) {
    remove_tree($task_work, {keep_root => 1});
  }
  
-open(STDOUT_ORIG, ">&", STDOUT);
-open(STDERR_ORIG, ">&", STDERR);
-open(STDOUT, ">>", "$destdir.log");
-open(STDERR, ">&", STDOUT);
-
  ### Crunch script run mode
  if (@ARGV) {
    # We want to do routine logging during task 0 only.  This gives the user
@@ -1922,10 +2143,6 @@ if (@ARGV) {
      }
    }
  
-  close(STDOUT);
-  close(STDERR);
-  open(STDOUT, ">&", STDOUT_ORIG);
-  open(STDERR, ">&", STDERR_ORIG);
    exec(@ARGV);
    die "Cannot exec `@ARGV`: $!";
  }
@@ -1933,26 +2150,43 @@ if (@ARGV) {
  ### Installation mode
  open L, ">", "$destdir.lock" or die "$destdir.lock: $!";
  flock L, LOCK_EX;
-if (readlink ("$destdir.commit") eq $commit && -d $destdir) {
-  # This version already installed -> nothing to do.
+if (readlink ("$destdir.archive_hash") eq $archive_hash && -d $destdir) {
+  # This exact git archive (source + arvados sdk) is already installed
+  # here, so there's no need to reinstall it.
+
+  # We must consume our DATA section, though: otherwise the process
+  # feeding it to us will get SIGPIPE.
+  my $buf;
+  while (read(DATA, $buf, 65536)) { }
+
    exit(0);
  }
  
-unlink "$destdir.commit";
+unlink "$destdir.archive_hash";
  mkdir $destdir;
  
-if (!open(TARX, "|-", "tar", "-xC", $destdir)) {
-  die "Error launching 'tar -xC $destdir': $!";
-}
-# If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
-# get SIGPIPE.  We must feed it data incrementally.
-my $tar_input;
-while (read(DATA, $tar_input, 65536)) {
-  print TARX $tar_input;
-}
-if(!close(TARX)) {
-  die "'tar -xC $destdir' exited $?: $!";
-}
+do {
+  # Ignore SIGPIPE: we check retval of close() instead. See perlipc(1).
+  local $SIG{PIPE} = "IGNORE";
+  warn "Extracting archive: $archive_hash\n";
+  # --ignore-zeros is necessary sometimes: depending on how much NUL
+  # padding tar -A put on our combined archive (which in turn depends
+  # on the length of the component archives) tar without
+  # --ignore-zeros will exit before consuming stdin and cause close()
+  # to fail on the resulting SIGPIPE.
+  if (!open(TARX, "|-", "tar", "--ignore-zeros", "-xC", $destdir)) {
+    die "Error launching 'tar -xC $destdir': $!";
+  }
+  # If we send too much data to tar in one write (> 4-5 MiB), it stops, and we
+  # get SIGPIPE.  We must feed it data incrementally.
+  my $tar_input;
+  while (read(DATA, $tar_input, 65536)) {
+    print TARX $tar_input;
+  }
+  if(!close(TARX)) {
+    die "'tar -xC $destdir' exited $?: $!";
+  }
+};
  
  mkdir $install_dir;
  
@@ -1969,15 +2203,34 @@ if (-d $sdk_root) {
  }
  
  my $python_dir = "$install_dir/python";
-if ((-d $python_dir) and can_run("python2.7") and
-    (system("python2.7", "$python_dir/setup.py", "--quiet", "egg_info") != 0)) {
-  # egg_info failed, probably when it asked git for a build tag.
-  # Specify no build tag.
-  open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
-  print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
-  close($pysdk_cfg);
+if ((-d $python_dir) and can_run("python2.7")) {
+  open(my $egg_info_pipe, "-|",
+       "python2.7 \Q$python_dir/setup.py\E --quiet egg_info 2>&1 >/dev/null");
+  my @egg_info_errors = <$egg_info_pipe>;
+  close($egg_info_pipe);
+  if ($?) {
+    if (@egg_info_errors and ($egg_info_errors[-1] =~ /\bgit\b/)) {
+      # egg_info apparently failed because it couldn't ask git for a build tag.
+      # Specify no build tag.
+      open(my $pysdk_cfg, ">>", "$python_dir/setup.cfg");
+      print $pysdk_cfg "\n[egg_info]\ntag_build =\n";
+      close($pysdk_cfg);
+    } else {
+      my $egg_info_exit = $? >> 8;
+      foreach my $errline (@egg_info_errors) {
+        print STDERR_ORIG $errline;
+      }
+      warn "python setup.py egg_info failed: exit $egg_info_exit";
+      exit ($egg_info_exit || 1);
+    }
+  }
  }
  
+# Hide messages from the install script (unless it fails: shell_or_die
+# will show $destdir.log in that case).
+open(STDOUT, ">>", "$destdir.log");
+open(STDERR, ">&", STDOUT);
+
  if (-e "$destdir/crunch_scripts/install") {
      shell_or_die (undef, "$destdir/crunch_scripts/install", $install_dir);
  } elsif (!-e "./install.sh" && -e "./tests/autotests.sh") {
@@ -1987,10 +2240,10 @@ if (-e "$destdir/crunch_scripts/install") {
      shell_or_die (undef, "./install.sh", $install_dir);
  }
  
-if ($commit) {
-    unlink "$destdir.commit.new";
-    symlink ($commit, "$destdir.commit.new") or die "$destdir.commit.new: $!";
-    rename ("$destdir.commit.new", "$destdir.commit") or die "$destdir.commit: $!";
+if ($archive_hash) {
+    unlink "$destdir.archive_hash.new";
+    symlink ($archive_hash, "$destdir.archive_hash.new") or die "$destdir.archive_hash.new: $!";
+    rename ("$destdir.archive_hash.new", "$destdir.archive_hash") or die "$destdir.archive_hash: $!";
  }
  
  close L;
diff --git a/sdk/cli/test/binstub_clean_fail/mount b/sdk/cli/test/binstub_clean_fail/mount

new file mode 100755 (executable)

index 0000000..961ac28
--- /dev/null
+++ b/sdk/cli/test/binstub_clean_fail/mount
@@ -0,0 +1,3 @@
+#!/bin/sh
+echo >&2 Failing mount stub was called
+exit 1
diff --git a/sdk/cli/test/binstub_docker_noop/docker.io b/sdk/cli/test/binstub_docker_noop/docker.io

new file mode 100755 (executable)

index 0000000..af3a4e4
--- /dev/null
+++ b/sdk/cli/test/binstub_docker_noop/docker.io
@@ -0,0 +1,2 @@
+#!/bin/sh
+true
diff --git a/sdk/cli/test/binstub_sanity_check/docker.io b/sdk/cli/test/binstub_sanity_check/docker.io

new file mode 100755 (executable)

index 0000000..8f1569d
--- /dev/null
+++ b/sdk/cli/test/binstub_sanity_check/docker.io
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 8
diff --git a/sdk/cli/test/binstub_sanity_check/true b/sdk/cli/test/binstub_sanity_check/true

new file mode 100755 (executable)

index 0000000..4b88b91
--- /dev/null
+++ b/sdk/cli/test/binstub_sanity_check/true
@@ -0,0 +1,2 @@
+#!/bin/sh
+exit 7
diff --git a/sdk/cli/test/test_arv-collection-create.rb b/sdk/cli/test/test_arv-collection-create.rb

index 18bef403b761f52701fdc86b2919dac44de59e13..3dc4bdd434a101507fee3ebd8f2e5004e66cd49c 100644 (file)
--- a/sdk/cli/test/test_arv-collection-create.rb
+++ b/sdk/cli/test/test_arv-collection-create.rb
@@ -7,8 +7,6 @@ class TestCollectionCreate < Minitest::Test
    end
  
    def test_small_collection
-    skip "Waiting unitl #4534 is implemented"
-
      uuid = Digest::MD5.hexdigest(foo_manifest) + '+' + foo_manifest.size.to_s
      out, err = capture_subprocess_io do
        assert_arv('--format', 'uuid', 'collection', 'create', '--collection', {
diff --git a/sdk/cli/test/test_arv-get.rb b/sdk/cli/test/test_arv-get.rb

index 67dd399a2456fe4a7c2a2a2cf4d86401d409e6d6..5e58014cbfa10d3b9b67a8b7cddca8b8676f646c 100644 (file)
--- a/sdk/cli/test/test_arv-get.rb
+++ b/sdk/cli/test/test_arv-get.rb
@@ -30,14 +30,10 @@ class TestArvGet < Minitest::Test
    end
  
    def test_file_to_dev_stdout
-    skip "Waiting unitl #4534 is implemented"
-
      test_file_to_stdout('/dev/stdout')
    end
  
    def test_file_to_stdout(specify_stdout_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert_arv_get @@foo_manifest_locator + '/foo', specify_stdout_as
      end
@@ -46,8 +42,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_file_to_file
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/foo'
@@ -58,34 +52,30 @@ class TestArvGet < Minitest::Test
    end
  
    def test_file_to_file_no_overwrite_file
-    skip "Waiting unitl #4534 is implemented"
      File.open './tmp/foo', 'wb' do |f|
        f.write 'baz'
      end
      out, err = capture_subprocess_io do
        assert_arv_get false, @@foo_manifest_locator + '/foo', 'tmp/foo'
      end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
      assert_equal '', out
      assert_equal 'baz', IO.read('tmp/foo')
    end
  
    def test_file_to_file_no_overwrite_file_in_dir
-    skip "Waiting unitl #4534 is implemented"
      File.open './tmp/foo', 'wb' do |f|
        f.write 'baz'
      end
      out, err = capture_subprocess_io do
        assert_arv_get false, @@foo_manifest_locator + '/', 'tmp/'
      end
-    assert_match /Error:/, err
+    assert_match /Local file tmp\/foo already exists/, err
      assert_equal '', out
      assert_equal 'baz', IO.read('tmp/foo')
    end
  
    def test_file_to_file_force_overwrite
-    skip "Waiting unitl #4534 is implemented"
-
      File.open './tmp/foo', 'wb' do |f|
        f.write 'baz'
      end
@@ -99,8 +89,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_file_to_file_skip_existing
-    skip "Waiting unitl #4534 is implemented"
-
      File.open './tmp/foo', 'wb' do |f|
        f.write 'baz'
      end
@@ -114,8 +102,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_file_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get @@foo_manifest_locator + '/foo', 'tmp/'
@@ -142,28 +128,22 @@ class TestArvGet < Minitest::Test
    end
  
    def test_nonexistent_block
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6'
+      assert_arv_get false, 'e796ab2294f3e48ec709ffa8d6daf58c'
      end
      assert_equal '', out
      assert_match /Error:/, err
    end
  
    def test_nonexistent_manifest
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
-      assert_arv_get false, 'f1554a91e925d6213ce7c3103c5110c6/', 'tmp/'
+      assert_arv_get false, 'acbd18db4cc2f85cedef654fccc4a4d8/', 'tmp/'
      end
      assert_equal '', out
      assert_match /Error:/, err
    end
  
    def test_manifest_root_to_dir
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp/'
@@ -174,8 +154,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_manifest_root_to_dir_noslash
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get '-r', @@foo_manifest_locator + '/', 'tmp'
@@ -186,8 +164,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_display_md5sum
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get '-r', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -198,8 +174,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_md5sum_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get '-n', '--md5sum', @@foo_manifest_locator + '/', 'tmp/'
@@ -210,8 +184,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_sha1_nowrite
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get '-n', '-r', '--hash', 'sha1', @@foo_manifest_locator+'/', 'tmp/'
@@ -222,8 +194,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_block_to_file
-    skip "Waiting unitl #4534 is implemented"
-
      remove_tmp_foo
      out, err = capture_subprocess_io do
        assert_arv_get @@foo_manifest_locator, 'tmp/foo'
@@ -236,8 +206,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_create_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
      `rm -rf ./tmp/arv-get-test/`
      Dir.mkdir './tmp/arv-get-test'
      out, err = capture_subprocess_io do
@@ -249,8 +217,6 @@ class TestArvGet < Minitest::Test
    end
  
    def test_create_partial_directory_tree
-    skip "Waiting unitl #4534 is implemented"
-
      `rm -rf ./tmp/arv-get-test/`
      Dir.mkdir './tmp/arv-get-test'
      out, err = capture_subprocess_io do
diff --git a/sdk/cli/test/test_arv-put.rb b/sdk/cli/test/test_arv-put.rb

index 73513db56cb17ee5f6d88d151205f437e6d22107..2f20e18440a2ff61dde6b748d3b327587530b142 100644 (file)
--- a/sdk/cli/test/test_arv-put.rb
+++ b/sdk/cli/test/test_arv-put.rb
@@ -22,8 +22,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_raw_stdin
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        r,w = IO.pipe
        wpid = fork do
@@ -41,8 +39,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_raw_file
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--raw', './tmp/foo')
      end
@@ -52,8 +48,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_raw_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--raw', './tmp/empty_file')
      end
@@ -83,8 +77,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_filename_arg_with_empty_file
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--filename', 'foo', './tmp/empty_file')
      end
@@ -94,8 +86,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_as_stream
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--as-stream', './tmp/foo')
      end
@@ -105,8 +95,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_progress
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--manifest', '--progress', './tmp/foo')
      end
@@ -115,8 +103,6 @@ class TestArvPut < Minitest::Test
    end
  
    def test_batch_progress
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        assert arv_put('--manifest', '--batch-progress', './tmp/foo')
      end
@@ -136,20 +122,14 @@ class TestArvPut < Minitest::Test
    end
  
    def test_read_from_implicit_stdin
-    skip "Waiting unitl #4534 is implemented"
-
      test_read_from_stdin(specify_stdin_as='--manifest')
    end
  
    def test_read_from_dev_stdin
-    skip "Waiting unitl #4534 is implemented"
-
      test_read_from_stdin(specify_stdin_as='/dev/stdin')
    end
  
    def test_read_from_stdin(specify_stdin_as='-')
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        r,w = IO.pipe
        wpid = fork do
@@ -168,22 +148,16 @@ class TestArvPut < Minitest::Test
    end
  
    def test_read_from_implicit_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
      test_read_from_stdin_implicit_manifest(specify_stdin_as=nil,
                                             expect_filename='stdin')
    end
  
    def test_read_from_dev_stdin_implicit_manifest
-    skip "Waiting unitl #4534 is implemented"
-
      test_read_from_stdin_implicit_manifest(specify_stdin_as='/dev/stdin')
    end
  
    def test_read_from_stdin_implicit_manifest(specify_stdin_as='-',
                                               expect_filename=nil)
-    skip "Waiting unitl #4534 is implemented"
-
      expect_filename = expect_filename || specify_stdin_as.split('/').last
      out, err = capture_subprocess_io do
        r,w = IO.pipe
diff --git a/sdk/cli/test/test_arv-run-pipeline-instance.rb b/sdk/cli/test/test_arv-run-pipeline-instance.rb

index 8c8d1d8331ae05fcbda64a65289732188c66bcd8..cac89b37bc0555c4929c6efadf873c32aed01297 100644 (file)
--- a/sdk/cli/test/test_arv-run-pipeline-instance.rb
+++ b/sdk/cli/test/test_arv-run-pipeline-instance.rb
@@ -5,8 +5,6 @@ class TestRunPipelineInstance < Minitest::Test
    end
  
    def test_run_pipeline_instance_get_help
-    skip "Waiting unitl #4534 is implemented"
-
      out, err = capture_subprocess_io do
        system ('arv-run-pipeline-instance -h')
      end
diff --git a/sdk/cli/test/test_arv-tag.rb b/sdk/cli/test/test_arv-tag.rb

index a5a1c94fff29227e0944afcae08383529cfe0b33..f4eba4651cbcd06494c41d1e05311dac663f65ed 100644 (file)
--- a/sdk/cli/test/test_arv-tag.rb
+++ b/sdk/cli/test/test_arv-tag.rb
@@ -9,7 +9,7 @@ end
  class TestArvTag < Minitest::Test
  
    def test_no_args
-    skip "Waiting unitl #4534 is implemented"
+    skip "Waiting until #4534 is implemented"
  
      # arv-tag exits with failure if run with no args
      out, err = capture_subprocess_io do
diff --git a/sdk/cli/test/test_crunch-job.rb b/sdk/cli/test/test_crunch-job.rb

new file mode 100644 (file)

index 0000000..22d756a
--- /dev/null
+++ b/sdk/cli/test/test_crunch-job.rb
@@ -0,0 +1,126 @@
+require 'minitest/autorun'
+
+class TestCrunchJob < Minitest::Test
+  SPECIAL_EXIT = {
+    EX_RETRY_UNLOCKED: 93,
+    EX_TEMPFAIL: 75,
+  }
+
+  JOBSPEC = {
+    grep_local: {
+      script: 'grep',
+      script_version: 'master',
+      repository: File.absolute_path('../../../..', __FILE__),
+      script_parameters: {foo: 'bar'},
+    },
+  }
+
+  def setup
+  end
+
+  def crunchjob
+    File.absolute_path '../../bin/crunch-job', __FILE__
+  end
+
+  # Return environment suitable for running crunch-job.
+  def crunchenv opts={}
+    env = ENV.to_h
+    env['CRUNCH_REFRESH_TRIGGER'] =
+      File.absolute_path('../../../../tmp/crunch-refresh-trigger', __FILE__)
+    env
+  end
+
+  def jobspec label
+    JOBSPEC[label].dup
+  end
+
+  # Encode job record to json and run it with crunch-job.
+  #
+  # opts[:binstubs] is an array of X where ./binstub_X is added to
+  # PATH in order to mock system programs.
+  def tryjobrecord jobrecord, opts={}
+    env = crunchenv
+    (opts[:binstubs] || []).each do |binstub|
+      env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
+    end
+    system env, crunchjob, '--job', jobrecord.to_json
+  end
+
+  def test_bogus_json
+    out, err = capture_subprocess_io do
+      system crunchenv, crunchjob, '--job', '"}{"'
+    end
+    assert_equal false, $?.success?
+    # Must not conflict with our special exit statuses
+    assert_jobfail $?
+    assert_match /JSON/, err
+  end
+
+  def test_fail_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 7/, err
+  end
+
+  def test_fail_docker_sanity_check
+    out, err = capture_subprocess_io do
+      j = {}
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['sanity_check']
+    end
+    assert_equal 75, $?.exitstatus
+    assert_match /Sanity check failed: 8/, err
+  end
+
+  def test_no_script_specified
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j.delete :script
+      tryjobrecord j
+    end
+    assert_match /No script specified/, err
+    assert_jobfail $?
+  end
+
+  def test_fail_clean_tmp
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      tryjobrecord j, binstubs: ['clean_fail']
+    end
+    assert_match /Failing mount stub was called/, err
+    assert_match /Clean work dirs: exit 1\n$/, err
+    assert_equal SPECIAL_EXIT[:EX_RETRY_UNLOCKED], $?.exitstatus
+  end
+
+  def test_docker_image_missing
+    skip 'API bug: it refuses to create this job in Running state'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:docker_image_locator] = '4d449b9d34f2e2222747ef79c53fa3ff+1234'
+      tryjobrecord j, binstubs: ['docker_noop']
+    end
+    assert_match /No Docker image hash found from locator/, err
+    assert_jobfail $?
+  end
+
+  def test_script_version_not_found_in_repository
+    bogus_version = 'f8b72707c1f5f740dbf1ed56eb429a36e0dee770'
+    out, err = capture_subprocess_io do
+      j = jobspec :grep_local
+      j[:script_version] = bogus_version
+      tryjobrecord j
+    end
+    assert_match /'#{bogus_version}' not found, giving up/, err
+    assert_jobfail $?
+  end
+
+  # Ensure procstatus is not interpreted as a temporary infrastructure
+  # problem. Would be assert_http_4xx if this were http.
+  def assert_jobfail procstatus
+    refute_includes SPECIAL_EXIT.values, procstatus.exitstatus
+    assert_equal false, procstatus.success?
+  end
+end
diff --git a/sdk/cwl/.gitignore b/sdk/cwl/.gitignore

new file mode 120000 (symlink)

index 0000000..1399fd4
--- /dev/null
+++ b/sdk/cwl/.gitignore
@@ -0,0 +1 @@
+../python/.gitignore
+\ No newline at end of file
diff --git a/sdk/cwl/README.rst b/sdk/cwl/README.rst

new file mode 100644 (file)

index 0000000..743b6c6
--- /dev/null
+++ b/sdk/cwl/README.rst
@@ -0,0 +1 @@
+Arvados Common Workflow Language (CWL) runner.
diff --git a/sdk/cwl/arvados_cwl/__init__.py b/sdk/cwl/arvados_cwl/__init__.py

new file mode 100644 (file)

index 0000000..f3298ec
--- /dev/null
+++ b/sdk/cwl/arvados_cwl/__init__.py
@@ -0,0 +1,295 @@
+#!/usr/bin/env python
+
+import argparse
+import arvados
+import arvados.events
+import arvados.commands.keepdocker
+import arvados.commands.run
+import cwltool.draft2tool
+import cwltool.workflow
+import cwltool.main
+import threading
+import cwltool.docker
+import fnmatch
+import logging
+import re
+import os
+from cwltool.process import get_feature
+
+logger = logging.getLogger('arvados.cwl-runner')
+logger.setLevel(logging.INFO)
+
+def arv_docker_get_image(api_client, dockerRequirement, pull_image):
+    if "dockerImageId" not in dockerRequirement and "dockerPull" in dockerRequirement:
+        dockerRequirement["dockerImageId"] = dockerRequirement["dockerPull"]
+
+    sp = dockerRequirement["dockerImageId"].split(":")
+    image_name = sp[0]
+    image_tag = sp[1] if len(sp) > 1 else None
+
+    images = arvados.commands.keepdocker.list_images_in_arv(api_client, 3,
+                                                            image_name=image_name,
+                                                            image_tag=image_tag)
+
+    if not images:
+        imageId = cwltool.docker.get_image(dockerRequirement, pull_image)
+        args = [image_name]
+        if image_tag:
+            args.append(image_tag)
+        arvados.commands.keepdocker.main(args)
+
+    return dockerRequirement["dockerImageId"]
+
+class CollectionFsAccess(cwltool.draft2tool.StdFsAccess):
+    def __init__(self, basedir):
+        self.collections = {}
+        self.basedir = basedir
+
+    def get_collection(self, path):
+        p = path.split("/")
+        if arvados.util.keep_locator_pattern.match(p[0]):
+            if p[0] not in self.collections:
+                self.collections[p[0]] = arvados.collection.CollectionReader(p[0])
+            return (self.collections[p[0]], "/".join(p[1:]))
+        else:
+            return (None, path)
+
+    def _match(self, collection, patternsegments, parent):
+        ret = []
+        for filename in collection:
+            if fnmatch.fnmatch(filename, patternsegments[0]):
+                cur = os.path.join(parent, filename)
+                if len(patternsegments) == 1:
+                    ret.append(cur)
+                else:
+                    ret.extend(self._match(collection[filename], patternsegments[1:], cur))
+        return ret
+
+    def glob(self, pattern):
+        collection, rest = self.get_collection(pattern)
+        patternsegments = rest.split("/")
+        return self._match(collection, patternsegments, collection.manifest_locator())
+
+    def open(self, fn, mode):
+        collection, rest = self.get_collection(fn)
+        if collection:
+            return collection.open(rest, mode)
+        else:
+            return open(self._abs(fn), mode)
+
+    def exists(self, fn):
+        collection, rest = self.get_collection(fn)
+        if collection:
+            return collection.exists(rest)
+        else:
+            return os.path.exists(self._abs(fn))
+
+class ArvadosJob(object):
+    def __init__(self, runner):
+        self.arvrunner = runner
+        self.running = False
+
+    def run(self, dry_run=False, pull_image=True, **kwargs):
+        script_parameters = {
+            "command": self.command_line
+        }
+        runtime_constraints = {}
+
+        if self.generatefiles:
+            vwd = arvados.collection.Collection()
+            for t in self.generatefiles:
+                if isinstance(self.generatefiles[t], dict):
+                    src, rest = self.arvrunner.fs_access.get_collection(self.generatefiles[t]["path"][6:])
+                    vwd.copy(rest, t, source_collection=src)
+                else:
+                    with vwd.open(t, "w") as f:
+                        f.write(self.generatefiles[t])
+            vwd.save_new()
+            script_parameters["task.vwd"] = vwd.portable_data_hash()
+
+        script_parameters["task.env"] = {"TMPDIR": "$(task.tmpdir)"}
+        if self.environment:
+            script_parameters["task.env"].update(self.environment)
+
+        if self.stdin:
+            script_parameters["task.stdin"] = self.pathmapper.mapper(self.stdin)[1]
+
+        if self.stdout:
+            script_parameters["task.stdout"] = self.stdout
+
+        (docker_req, docker_is_req) = get_feature(self, "DockerRequirement")
+        if docker_req and kwargs.get("use_container") is not False:
+            runtime_constraints["docker_image"] = arv_docker_get_image(self.arvrunner.api, docker_req, pull_image)
+            runtime_constraints["arvados_sdk_version"] = "master"
+
+        response = self.arvrunner.api.jobs().create(body={
+            "script": "run-command",
+            "repository": "arvados",
+            "script_version": "master",
+            "script_parameters": script_parameters,
+            "runtime_constraints": runtime_constraints
+        }, find_or_create=kwargs.get("enable_reuse", True)).execute()
+
+        self.arvrunner.jobs[response["uuid"]] = self
+
+        logger.info("Job %s is %s", response["uuid"], response["state"])
+
+        if response["state"] in ("Complete", "Failed", "Cancelled"):
+            self.done(response)
+
+    def done(self, record):
+        try:
+            if record["state"] == "Complete":
+                processStatus = "success"
+            else:
+                processStatus = "permanentFail"
+
+            try:
+                outputs = {}
+                outputs = self.collect_outputs(record["output"])
+            except Exception as e:
+                logger.warn(str(e))
+                processStatus = "permanentFail"
+
+            self.output_callback(outputs, processStatus)
+        finally:
+            del self.arvrunner.jobs[record["uuid"]]
+
+class ArvPathMapper(cwltool.pathmapper.PathMapper):
+    def __init__(self, arvrunner, referenced_files, basedir, **kwargs):
+        self._pathmap = {}
+        uploadfiles = []
+
+        pdh_path = re.compile(r'^[0-9a-f]{32}\+\d+/.+')
+
+        for src in referenced_files:
+            if isinstance(src, basestring) and pdh_path.match(src):
+                self._pathmap[src] = (src, "/keep/%s" % src)
+            else:
+                ab = src if os.path.isabs(src) else os.path.join(basedir, src)
+                st = arvados.commands.run.statfile("", ab)
+                if kwargs.get("conformance_test"):
+                    self._pathmap[src] = (src, ab)
+                elif isinstance(st, arvados.commands.run.UploadFile):
+                    uploadfiles.append((src, ab, st))
+                elif isinstance(st, arvados.commands.run.ArvFile):
+                    self._pathmap[src] = (ab, st.fn)
+                else:
+                    raise cwltool.workflow.WorkflowException("Input file path '%s' is invalid" % st)
+
+        if uploadfiles:
+            arvados.commands.run.uploadfiles([u[2] for u in uploadfiles], arvrunner.api, dry_run=kwargs.get("dry_run"), num_retries=3)
+
+        for src, ab, st in uploadfiles:
+            self._pathmap[src] = (ab, st.fn)
+
+
+
+class ArvadosCommandTool(cwltool.draft2tool.CommandLineTool):
+    def __init__(self, arvrunner, toolpath_object, **kwargs):
+        super(ArvadosCommandTool, self).__init__(toolpath_object, **kwargs)
+        self.arvrunner = arvrunner
+
+    def makeJobRunner(self):
+        return ArvadosJob(self.arvrunner)
+
+    def makePathMapper(self, reffiles, input_basedir, **kwargs):
+        return ArvPathMapper(self.arvrunner, reffiles, input_basedir, **kwargs)
+
+
+class ArvCwlRunner(object):
+    def __init__(self, api_client):
+        self.api = api_client
+        self.jobs = {}
+        self.lock = threading.Lock()
+        self.cond = threading.Condition(self.lock)
+        self.final_output = None
+
+    def arvMakeTool(self, toolpath_object, **kwargs):
+        if "class" in toolpath_object and toolpath_object["class"] == "CommandLineTool":
+            return ArvadosCommandTool(self, toolpath_object, **kwargs)
+        else:
+            return cwltool.workflow.defaultMakeTool(toolpath_object, **kwargs)
+
+    def output_callback(self, out, processStatus):
+        if processStatus == "success":
+            logger.info("Overall job status is %s", processStatus)
+        else:
+            logger.warn("Overall job status is %s", processStatus)
+        self.final_output = out
+
+    def on_message(self, event):
+        if "object_uuid" in event:
+                if event["object_uuid"] in self.jobs and event["event_type"] == "update":
+                    if event["properties"]["new_attributes"]["state"] == "Running" and self.jobs[event["object_uuid"]].running is False:
+                        logger.info("Job %s is Running", event["object_uuid"])
+                        with self.lock:
+                            self.jobs[event["object_uuid"]].running = True
+                    elif event["properties"]["new_attributes"]["state"] in ("Complete", "Failed", "Cancelled"):
+                        logger.info("Job %s is %s", event["object_uuid"], event["properties"]["new_attributes"]["state"])
+                        try:
+                            self.cond.acquire()
+                            self.jobs[event["object_uuid"]].done(event["properties"]["new_attributes"])
+                            self.cond.notify()
+                        finally:
+                            self.cond.release()
+
+    def arvExecutor(self, tool, job_order, input_basedir, args, **kwargs):
+        events = arvados.events.subscribe(arvados.api('v1'), [["object_uuid", "is_a", "arvados#job"]], self.on_message)
+
+        self.fs_access = CollectionFsAccess(input_basedir)
+
+        kwargs["fs_access"] = self.fs_access
+        kwargs["enable_reuse"] = args.enable_reuse
+
+        if kwargs.get("conformance_test"):
+            return cwltool.main.single_job_executor(tool, job_order, input_basedir, args, **kwargs)
+        else:
+            jobiter = tool.job(job_order,
+                            input_basedir,
+                            self.output_callback,
+                            **kwargs)
+
+            for runnable in jobiter:
+                if runnable:
+                    with self.lock:
+                        runnable.run(**kwargs)
+                else:
+                    if self.jobs:
+                        try:
+                            self.cond.acquire()
+                            self.cond.wait()
+                        finally:
+                            self.cond.release()
+                    else:
+                        logger.error("Workflow cannot make any more progress.")
+                        break
+
+            while self.jobs:
+                try:
+                    self.cond.acquire()
+                    self.cond.wait()
+                finally:
+                    self.cond.release()
+
+            events.close()
+
+            if self.final_output is None:
+                raise cwltool.workflow.WorkflowException("Workflow did not return a result.")
+
+            return self.final_output
+
+
+def main(args, stdout, stderr, api_client=None):
+    runner = ArvCwlRunner(api_client=arvados.api('v1'))
+    args.append("--leave-outputs")
+    parser = cwltool.main.arg_parser()
+    exgroup = parser.add_mutually_exclusive_group()
+    exgroup.add_argument("--enable-reuse", action="store_true",
+                        default=False, dest="enable_reuse",
+                        help="")
+    exgroup.add_argument("--disable-reuse", action="store_false",
+                        default=False, dest="enable_reuse",
+                        help="")
+
+    return cwltool.main.main(args, executor=runner.arvExecutor, makeTool=runner.arvMakeTool, parser=parser)
diff --git a/sdk/cwl/bin/cwl-runner b/sdk/cwl/bin/cwl-runner

new file mode 100755 (executable)

index 0000000..f31aefd
--- /dev/null
+++ b/sdk/cwl/bin/cwl-runner
@@ -0,0 +1,7 @@
+#!/usr/bin/env python
+
+import sys
+
+from arvados_cwl import main
+
+sys.exit(main(sys.argv[1:], sys.stdout, sys.stderr))
diff --git a/sdk/cwl/gittaggers.py b/sdk/cwl/gittaggers.py

new file mode 120000 (symlink)

index 0000000..d59c02c
--- /dev/null
+++ b/sdk/cwl/gittaggers.py
@@ -0,0 +1 @@
+../python/gittaggers.py
+\ No newline at end of file
diff --git a/sdk/cwl/setup.py b/sdk/cwl/setup.py

new file mode 100644 (file)

index 0000000..2fd03f7
--- /dev/null
+++ b/sdk/cwl/setup.py
@@ -0,0 +1,37 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__) or '.'
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+try:
+    import gittaggers
+    tagger = gittaggers.EggInfoFromGit
+except ImportError:
+    tagger = egg_info_cmd.egg_info
+
+setup(name='arvados-cwl-runner',
+      version='1.0',
+      description='Arvados Common Workflow Language runner',
+      long_description=open(README).read(),
+      author='Arvados',
+      author_email='info@arvados.org',
+      url="https://arvados.org",
+      download_url="https://github.com/curoverse/arvados.git",
+      license='Apache 2.0',
+      packages=find_packages(),
+      scripts=[
+          'bin/cwl-runner'
+      ],
+      install_requires=[
+          'cwltool',
+          'arvados-python-client'
+      ],
+      zip_safe=True,
+      cmdclass={'egg_info': tagger},
+      )
diff --git a/sdk/go/arvadosclient/arvadosclient.go b/sdk/go/arvadosclient/arvadosclient.go

index 4c16398397fa8881ffe8060f45b2e910007d506e..1cce0a7fc92d24e21fa694add86c75c63952eb46 100644 (file)
--- a/sdk/go/arvadosclient/arvadosclient.go
+++ b/sdk/go/arvadosclient/arvadosclient.go
@@ -16,17 +16,42 @@ import (
         "strings"
  )
  
-// Errors
+type StringMatcher func(string) bool
+
+var UUIDMatch StringMatcher = regexp.MustCompile(`^[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}$`).MatchString
+var PDHMatch StringMatcher = regexp.MustCompile(`^[0-9a-f]{32}\+\d+$`).MatchString
+
  var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
  var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
+var ErrInvalidArgument = errors.New("Invalid argument")
+
+// Indicates an error that was returned by the API server.
+type APIServerError struct {
+       // Address of server returning error, of the form "host:port".
+       ServerAddress string
+
+       // Components of server response.
+       HttpStatusCode    int
+       HttpStatusMessage string
  
-type ArvadosApiError struct {
-       error
-       HttpStatusCode int
-       HttpStatus string
+       // Additional error details from response body.
+       ErrorDetails []string
  }
  
-func (e ArvadosApiError) Error() string { return e.error.Error() }
+func (e APIServerError) Error() string {
+       if len(e.ErrorDetails) > 0 {
+               return fmt.Sprintf("arvados API server error: %s (%d: %s) returned by %s",
+                       strings.Join(e.ErrorDetails, "; "),
+                       e.HttpStatusCode,
+                       e.HttpStatusMessage,
+                       e.ServerAddress)
+       } else {
+               return fmt.Sprintf("arvados API server error: %d: %s returned by %s",
+                       e.HttpStatusCode,
+                       e.HttpStatusMessage,
+                       e.ServerAddress)
+       }
+}
  
  // Helper type so we don't have to write out 'map[string]interface{}' every time.
  type Dict map[string]interface{}
@@ -48,17 +73,20 @@ type ArvadosClient struct {
         // If true, sets the X-External-Client header to indicate
         // the client is outside the cluster.
         External bool
+
+       // Discovery document
+       DiscoveryDoc Dict
  }
  
-// Create a new KeepClient, initialized with standard Arvados environment
+// Create a new ArvadosClient, initialized with standard Arvados environment
  // variables ARVADOS_API_HOST, ARVADOS_API_TOKEN, and (optionally)
  // ARVADOS_API_HOST_INSECURE.
-func MakeArvadosClient() (kc ArvadosClient, err error) {
+func MakeArvadosClient() (ac ArvadosClient, err error) {
         var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
         insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
         external := matchTrue.MatchString(os.Getenv("ARVADOS_EXTERNAL_CLIENT"))
  
-       kc = ArvadosClient{
+       ac = ArvadosClient{
                 ApiServer:   os.Getenv("ARVADOS_API_HOST"),
                 ApiToken:    os.Getenv("ARVADOS_API_TOKEN"),
                 ApiInsecure: insecure,
@@ -66,38 +94,31 @@ func MakeArvadosClient() (kc ArvadosClient, err error) {
                         TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
                 External: external}
  
-       if kc.ApiServer == "" {
-               return kc, MissingArvadosApiHost
+       if ac.ApiServer == "" {
+               return ac, MissingArvadosApiHost
         }
-       if kc.ApiToken == "" {
-               return kc, MissingArvadosApiToken
+       if ac.ApiToken == "" {
+               return ac, MissingArvadosApiToken
         }
  
-       return kc, err
+       return ac, err
  }
  
-// Low-level access to a resource.
-//
-//   method - HTTP method, one of GET, HEAD, PUT, POST or DELETE
-//   resource - the arvados resource to act on
-//   uuid - the uuid of the specific item to access (may be empty)
-//   action - sub-action to take on the resource or uuid (may be empty)
-//   parameters - method parameters
-//
-// return
-//   reader - the body reader, or nil if there was an error
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) CallRaw(method string, resource string, uuid string, action string, parameters Dict) (reader io.ReadCloser, err error) {
+// CallRaw is the same as Call() but returns a Reader that reads the
+// response body, instead of taking an output object.
+func (c ArvadosClient) CallRaw(method string, resourceType string, uuid string, action string, parameters Dict) (reader io.ReadCloser, err error) {
         var req *http.Request
  
         u := url.URL{
                 Scheme: "https",
-               Host:   this.ApiServer}
+               Host:   c.ApiServer}
  
-       u.Path = "/arvados/v1"
+       if resourceType != API_DISCOVERY_RESOURCE {
+               u.Path = "/arvados/v1"
+       }
  
-       if resource != "" {
-               u.Path = u.Path + "/" + resource
+       if resourceType != "" {
+               u.Path = u.Path + "/" + resourceType
         }
         if uuid != "" {
                 u.Path = u.Path + "/" + uuid
@@ -110,12 +131,11 @@ func (this ArvadosClient) CallRaw(method string, resource string, uuid string, a
                 parameters = make(Dict)
         }
  
-       parameters["format"] = "json"
-
         vals := make(url.Values)
         for k, v := range parameters {
-               m, err := json.Marshal(v)
-               if err == nil {
+               if s, ok := v.(string); ok {
+                       vals.Set(k, s)
+               } else if m, err := json.Marshal(v); err == nil {
                         vals.Set(k, string(m))
                 }
         }
@@ -133,14 +153,14 @@ func (this ArvadosClient) CallRaw(method string, resource string, uuid string, a
         }
  
         // Add api token header
-       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.ApiToken))
-       if this.External {
+       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", c.ApiToken))
+       if c.External {
                 req.Header.Add("X-External-Client", "1")
         }
  
         // Make the request
         var resp *http.Response
-       if resp, err = this.Client.Do(req); err != nil {
+       if resp, err = c.Client.Do(req); err != nil {
                 return nil, err
         }
  
@@ -149,45 +169,52 @@ func (this ArvadosClient) CallRaw(method string, resource string, uuid string, a
         }
  
         defer resp.Body.Close()
-       errorText := fmt.Sprintf("API response: %s", resp.Status)
+       return nil, newAPIServerError(c.ApiServer, resp)
+}
+
+func newAPIServerError(ServerAddress string, resp *http.Response) APIServerError {
+
+       ase := APIServerError{
+               ServerAddress:     ServerAddress,
+               HttpStatusCode:    resp.StatusCode,
+               HttpStatusMessage: resp.Status}
  
         // If the response body has {"errors":["reason1","reason2"]}
         // then return those reasons.
         var errInfo = Dict{}
         if err := json.NewDecoder(resp.Body).Decode(&errInfo); err == nil {
                 if errorList, ok := errInfo["errors"]; ok {
-                       var errorStrings []string
                         if errArray, ok := errorList.([]interface{}); ok {
                                 for _, errItem := range errArray {
                                         // We expect an array of strings here.
                                         // Non-strings will be passed along
                                         // JSON-encoded.
                                         if s, ok := errItem.(string); ok {
-                                               errorStrings = append(errorStrings, s)
+                                               ase.ErrorDetails = append(ase.ErrorDetails, s)
                                         } else if j, err := json.Marshal(errItem); err == nil {
-                                               errorStrings = append(errorStrings, string(j))
+                                               ase.ErrorDetails = append(ase.ErrorDetails, string(j))
                                         }
                                 }
-                               errorText = strings.Join(errorStrings, "; ")
                         }
                 }
         }
-       return nil, ArvadosApiError{errors.New(errorText), resp.StatusCode, resp.Status}
+       return ase
  }
  
-// Access to a resource.
+// Call an API endpoint and parse the JSON response into an object.
+//
+//   method - HTTP method: GET, HEAD, PUT, POST, PATCH or DELETE.
+//   resourceType - the type of arvados resource to act on (e.g., "collections", "pipeline_instances").
+//   uuid - the uuid of the specific item to access. May be empty.
+//   action - API method name (e.g., "lock"). This is often empty if implied by method and uuid.
+//   parameters - method parameters.
+//   output - a map or annotated struct which is a legal target for encoding/json/Decoder.
  //
-//   method - HTTP method, one of GET, HEAD, PUT, POST or DELETE
-//   resource - the arvados resource to act on
-//   uuid - the uuid of the specific item to access (may be empty)
-//   action - sub-action to take on the resource or uuid (may be empty)
-//   parameters - method parameters
-//   output - a map or annotated struct which is a legal target for encoding/json/Decoder
-// return
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) Call(method string, resource string, uuid string, action string, parameters Dict, output interface{}) (err error) {
-       var reader io.ReadCloser
-       reader, err = this.CallRaw(method, resource, uuid, action, parameters)
+// Returns a non-nil error if an error occurs making the API call, the
+// API responds with a non-successful HTTP status, or an error occurs
+// parsing the response body.
+func (c ArvadosClient) Call(method string, resourceType string, uuid string, action string, parameters Dict, output interface{}) error {
+       reader, err := c.CallRaw(method, resourceType, uuid, action, parameters)
         if reader != nil {
                 defer reader.Close()
         }
@@ -204,48 +231,58 @@ func (this ArvadosClient) Call(method string, resource string, uuid string, acti
         return nil
  }
  
-// Create a new instance of a resource.
-//
-//   resource - the arvados resource on which to create an item
-//   parameters - method parameters
-//   output - a map or annotated struct which is a legal target for encoding/json/Decoder
-// return
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) Create(resource string, parameters Dict, output interface{}) (err error) {
-       return this.Call("POST", resource, "", "", parameters, output)
+// Create a new resource. See Call for argument descriptions.
+func (c ArvadosClient) Create(resourceType string, parameters Dict, output interface{}) error {
+       return c.Call("POST", resourceType, "", "", parameters, output)
  }
  
-// Delete an instance of a resource.
-//
-//   resource - the arvados resource on which to delete an item
-//   uuid - the item to delete
-//   parameters - method parameters
-//   output - a map or annotated struct which is a legal target for encoding/json/Decoder
-// return
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) Delete(resource string, uuid string, parameters Dict, output interface{}) (err error) {
-       return this.Call("DELETE", resource, uuid, "", parameters, output)
+// Delete a resource. See Call for argument descriptions.
+func (c ArvadosClient) Delete(resource string, uuid string, parameters Dict, output interface{}) (err error) {
+       return c.Call("DELETE", resource, uuid, "", parameters, output)
  }
  
-// Update fields of an instance of a resource.
-//
-//   resource - the arvados resource on which to update the item
-//   uuid - the item to update
-//   parameters - method parameters
-//   output - a map or annotated struct which is a legal target for encoding/json/Decoder
-// return
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) Update(resource string, uuid string, parameters Dict, output interface{}) (err error) {
-       return this.Call("PUT", resource, uuid, "", parameters, output)
+// Modify attributes of a resource. See Call for argument descriptions.
+func (c ArvadosClient) Update(resourceType string, uuid string, parameters Dict, output interface{}) (err error) {
+       return c.Call("PUT", resourceType, uuid, "", parameters, output)
  }
  
-// List the instances of a resource
-//
-//   resource - the arvados resource on which to list
-//   parameters - method parameters
-//   output - a map or annotated struct which is a legal target for encoding/json/Decoder
-// return
-//   err - error accessing the resource, or nil if no error
-func (this ArvadosClient) List(resource string, parameters Dict, output interface{}) (err error) {
-       return this.Call("GET", resource, "", "", parameters, output)
+// Get a resource. See Call for argument descriptions.
+func (c ArvadosClient) Get(resourceType string, uuid string, parameters Dict, output interface{}) (err error) {
+       if !UUIDMatch(uuid) && !(resourceType == "collections" && PDHMatch(uuid)) {
+               // No object has uuid == "": there is no need to make
+               // an API call. Furthermore, the HTTP request for such
+               // an API call would be "GET /arvados/v1/type/", which
+               // is liable to be misinterpreted as the List API.
+               return ErrInvalidArgument
+       }
+       return c.Call("GET", resourceType, uuid, "", parameters, output)
+}
+
+// List resources of a given type. See Call for argument descriptions.
+func (c ArvadosClient) List(resource string, parameters Dict, output interface{}) (err error) {
+       return c.Call("GET", resource, "", "", parameters, output)
+}
+
+const API_DISCOVERY_RESOURCE = "discovery/v1/apis/arvados/v1/rest"
+
+// Discovery returns the value of the given parameter in the discovery
+// document. Returns a non-nil error if the discovery document cannot
+// be retrieved/decoded. Returns ErrInvalidArgument if the requested
+// parameter is not found in the discovery document.
+func (c *ArvadosClient) Discovery(parameter string) (value interface{}, err error) {
+       if len(c.DiscoveryDoc) == 0 {
+               c.DiscoveryDoc = make(Dict)
+               err = c.Call("GET", API_DISCOVERY_RESOURCE, "", "", nil, &c.DiscoveryDoc)
+               if err != nil {
+                       return nil, err
+               }
+       }
+
+       var found bool
+       value, found = c.DiscoveryDoc[parameter]
+       if found {
+               return value, nil
+       } else {
+               return value, ErrInvalidArgument
+       }
  }
diff --git a/sdk/go/arvadosclient/arvadosclient_test.go b/sdk/go/arvadosclient/arvadosclient_test.go

index 1af964d0a045ad2b4bb0a6dd9610fcf11d8027d3..d35f6dacb72b632e18718b503d0a2f4ff55e7a17 100644 (file)
--- a/sdk/go/arvadosclient/arvadosclient_test.go
+++ b/sdk/go/arvadosclient/arvadosclient_test.go
@@ -1,8 +1,8 @@
  package arvadosclient
  
  import (
-       . "gopkg.in/check.v1"
         "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       . "gopkg.in/check.v1"
         "net/http"
         "os"
         "testing"
@@ -14,6 +14,7 @@ func Test(t *testing.T) {
  }
  
  var _ = Suite(&ServerRequiredSuite{})
+var _ = Suite(&UnitSuite{})
  
  // Tests that require the Keep server running
  type ServerRequiredSuite struct{}
@@ -46,6 +47,58 @@ func (s *ServerRequiredSuite) TestMakeArvadosClientInsecure(c *C) {
         c.Check(kc.Client.Transport.(*http.Transport).TLSClientConfig.InsecureSkipVerify, Equals, true)
  }
  
+func (s *ServerRequiredSuite) TestGetInvalidUUID(c *C) {
+       arv, err := MakeArvadosClient()
+
+       getback := make(Dict)
+       err = arv.Get("collections", "", nil, &getback)
+       c.Assert(err, Equals, ErrInvalidArgument)
+       c.Assert(len(getback), Equals, 0)
+
+       err = arv.Get("collections", "zebra-moose-unicorn", nil, &getback)
+       c.Assert(err, Equals, ErrInvalidArgument)
+       c.Assert(len(getback), Equals, 0)
+
+       err = arv.Get("collections", "acbd18db4cc2f85cedef654fccc4a4d8", nil, &getback)
+       c.Assert(err, Equals, ErrInvalidArgument)
+       c.Assert(len(getback), Equals, 0)
+}
+
+func (s *ServerRequiredSuite) TestGetValidUUID(c *C) {
+       arv, err := MakeArvadosClient()
+
+       getback := make(Dict)
+       err = arv.Get("collections", "zzzzz-4zz18-abcdeabcdeabcde", nil, &getback)
+       c.Assert(err, FitsTypeOf, APIServerError{})
+       c.Assert(err.(APIServerError).HttpStatusCode, Equals, http.StatusNotFound)
+       c.Assert(len(getback), Equals, 0)
+
+       err = arv.Get("collections", "acbd18db4cc2f85cedef654fccc4a4d8+3", nil, &getback)
+       c.Assert(err, FitsTypeOf, APIServerError{})
+       c.Assert(err.(APIServerError).HttpStatusCode, Equals, http.StatusNotFound)
+       c.Assert(len(getback), Equals, 0)
+}
+
+func (s *ServerRequiredSuite) TestInvalidResourceType(c *C) {
+       arv, err := MakeArvadosClient()
+
+       getback := make(Dict)
+       err = arv.Get("unicorns", "zzzzz-zebra-unicorn7unicorn", nil, &getback)
+       c.Assert(err, FitsTypeOf, APIServerError{})
+       c.Assert(err.(APIServerError).HttpStatusCode, Equals, http.StatusNotFound)
+       c.Assert(len(getback), Equals, 0)
+
+       err = arv.Update("unicorns", "zzzzz-zebra-unicorn7unicorn", nil, &getback)
+       c.Assert(err, FitsTypeOf, APIServerError{})
+       c.Assert(err.(APIServerError).HttpStatusCode, Equals, http.StatusNotFound)
+       c.Assert(len(getback), Equals, 0)
+
+       err = arv.List("unicorns", nil, &getback)
+       c.Assert(err, FitsTypeOf, APIServerError{})
+       c.Assert(err.(APIServerError).HttpStatusCode, Equals, http.StatusNotFound)
+       c.Assert(len(getback), Equals, 0)
+}
+
  func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
         arv, err := MakeArvadosClient()
  
@@ -62,6 +115,13 @@ func (s *ServerRequiredSuite) TestCreatePipelineTemplate(c *C) {
         c.Assert(getback["components"].(map[string]interface{})["c2"].(map[string]interface{})["script"], Equals, "script2")
  
         uuid := getback["uuid"].(string)
+
+       getback = make(Dict)
+       err = arv.Get("pipeline_templates", uuid, nil, &getback)
+       c.Assert(err, Equals, nil)
+       c.Assert(getback["name"], Equals, "tmp")
+       c.Assert(getback["components"].(map[string]interface{})["c1"].(map[string]interface{})["script"], Equals, "script1")
+
         getback = make(Dict)
         err = arv.Update("pipeline_templates", uuid,
                 Dict{
@@ -86,17 +146,63 @@ func (s *ServerRequiredSuite) TestErrorResponse(c *C) {
                 err := arv.Create("logs",
                         Dict{"log": Dict{"bogus_attr": "foo"}},
                         &getback)
+               c.Assert(err, ErrorMatches, "arvados API server error: .*")
                 c.Assert(err, ErrorMatches, ".*unknown attribute: bogus_attr.*")
-               c.Assert(err, FitsTypeOf, ArvadosApiError{})
-               c.Assert(err.(ArvadosApiError).HttpStatusCode, Equals, 422)
+               c.Assert(err, FitsTypeOf, APIServerError{})
+               c.Assert(err.(APIServerError).HttpStatusCode, Equals, 422)
         }
  
         {
                 err := arv.Create("bogus",
                         Dict{"bogus": Dict{}},
                         &getback)
-               c.Assert(err, ErrorMatches, "Path not found")
-               c.Assert(err, FitsTypeOf, ArvadosApiError{})
-               c.Assert(err.(ArvadosApiError).HttpStatusCode, Equals, 404)
+               c.Assert(err, ErrorMatches, "arvados API server error: .*")
+               c.Assert(err, ErrorMatches, ".*Path not found.*")
+               c.Assert(err, FitsTypeOf, APIServerError{})
+               c.Assert(err.(APIServerError).HttpStatusCode, Equals, 404)
         }
  }
+
+func (s *ServerRequiredSuite) TestAPIDiscovery_Get_defaultCollectionReplication(c *C) {
+       arv, err := MakeArvadosClient()
+       value, err := arv.Discovery("defaultCollectionReplication")
+       c.Assert(err, IsNil)
+       c.Assert(value, NotNil)
+}
+
+func (s *ServerRequiredSuite) TestAPIDiscovery_Get_noSuchParameter(c *C) {
+       arv, err := MakeArvadosClient()
+       value, err := arv.Discovery("noSuchParameter")
+       c.Assert(err, NotNil)
+       c.Assert(value, IsNil)
+}
+
+type UnitSuite struct{}
+
+func (s *UnitSuite) TestUUIDMatch(c *C) {
+       c.Assert(UUIDMatch("zzzzz-tpzed-000000000000000"), Equals, true)
+       c.Assert(UUIDMatch("zzzzz-zebra-000000000000000"), Equals, true)
+       c.Assert(UUIDMatch("00000-00000-zzzzzzzzzzzzzzz"), Equals, true)
+       c.Assert(UUIDMatch("ZEBRA-HORSE-AFRICANELEPHANT"), Equals, false)
+       c.Assert(UUIDMatch(" zzzzz-tpzed-000000000000000"), Equals, false)
+       c.Assert(UUIDMatch("d41d8cd98f00b204e9800998ecf8427e"), Equals, false)
+       c.Assert(UUIDMatch("d41d8cd98f00b204e9800998ecf8427e+0"), Equals, false)
+       c.Assert(UUIDMatch(""), Equals, false)
+}
+
+func (s *UnitSuite) TestPDHMatch(c *C) {
+       c.Assert(PDHMatch("zzzzz-tpzed-000000000000000"), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e"), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e+0"), Equals, true)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e+12345"), Equals, true)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e 12345"), Equals, false)
+       c.Assert(PDHMatch("D41D8CD98F00B204E9800998ECF8427E+12345"), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e+12345 "), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e+abcdef"), Equals, false)
+       c.Assert(PDHMatch("da39a3ee5e6b4b0d3255bfef95601890afd80709"), Equals, false)
+       c.Assert(PDHMatch("da39a3ee5e6b4b0d3255bfef95601890afd80709+0"), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427+12345"), Equals, false)
+       c.Assert(PDHMatch("d41d8cd98f00b204e9800998ecf8427e+12345\n"), Equals, false)
+       c.Assert(PDHMatch("+12345"), Equals, false)
+       c.Assert(PDHMatch(""), Equals, false)
+}
diff --git a/sdk/go/arvadosclient/pool.go b/sdk/go/arvadosclient/pool.go

new file mode 100644 (file)

index 0000000..87b67c3
--- /dev/null
+++ b/sdk/go/arvadosclient/pool.go
@@ -0,0 +1,52 @@
+package arvadosclient
+
+import (
+       "sync"
+)
+
+// A ClientPool is a pool of ArvadosClients. This is useful for
+// applications that make API calls using a dynamic set of tokens,
+// like web services that pass through their own clients'
+// credentials. See arvados-git-httpd for an example, and sync.Pool
+// for more information about garbage collection.
+type ClientPool struct {
+       sync.Pool
+       lastErr error
+}
+
+// MakeClientPool returns a new empty ClientPool.
+func MakeClientPool() *ClientPool {
+       p := &ClientPool{}
+       p.Pool = sync.Pool{New: func() interface{} {
+               arv, err := MakeArvadosClient()
+               if err != nil {
+                       p.lastErr = err
+                       return nil
+               }
+               return &arv
+       }}
+       return p
+}
+
+// Err returns the error that was encountered last time Get returned
+// nil.
+func (p *ClientPool) Err() error {
+       return p.lastErr
+}
+
+// Get returns an ArvadosClient taken from the pool, or a new one if
+// the pool is empty. If an existing client is returned, its state
+// (including its ApiToken) will be just as it was when it was Put
+// back in the pool.
+func (p *ClientPool) Get() *ArvadosClient {
+       c, ok := p.Pool.Get().(*ArvadosClient)
+       if !ok {
+               return nil
+       }
+       return c
+}
+
+// Put puts an ArvadosClient back in the pool.
+func (p *ClientPool) Put(c *ArvadosClient) {
+       p.Pool.Put(c)
+}
diff --git a/sdk/go/auth/auth.go b/sdk/go/auth/auth.go

new file mode 100644 (file)

index 0000000..4a719e9
--- /dev/null
+++ b/sdk/go/auth/auth.go
@@ -0,0 +1,61 @@
+package auth
+
+import (
+       "net/http"
+       "net/url"
+       "strings"
+)
+
+type Credentials struct {
+       Tokens []string
+}
+
+func NewCredentials() *Credentials {
+       return &Credentials{Tokens: []string{}}
+}
+
+func NewCredentialsFromHTTPRequest(r *http.Request) *Credentials {
+       c := NewCredentials()
+       c.LoadTokensFromHTTPRequest(r)
+       return c
+}
+
+// LoadTokensFromHttpRequest loads all tokens it can find in the
+// headers and query string of an http query.
+func (a *Credentials) LoadTokensFromHTTPRequest(r *http.Request) {
+       // Load plain token from "Authorization: OAuth2 ..." header
+       // (typically used by smart API clients)
+       if toks := strings.SplitN(r.Header.Get("Authorization"), " ", 2); len(toks) == 2 && toks[0] == "OAuth2" {
+               a.Tokens = append(a.Tokens, toks[1])
+       }
+
+       // Load base64-encoded token from "Authorization: Basic ..."
+       // header (typically used by git via credential helper)
+       if _, password, ok := BasicAuth(r); ok {
+               a.Tokens = append(a.Tokens, password)
+       }
+
+       // Load tokens from query string. It's generally not a good
+       // idea to pass tokens around this way, but passing a narrowly
+       // scoped token is a reasonable way to implement "secret link
+       // to an object" in a generic way.
+       //
+       // ParseQuery always returns a non-nil map which might have
+       // valid parameters, even when a decoding error causes it to
+       // return a non-nil err. We ignore err; hopefully the caller
+       // will also need to parse the query string for
+       // application-specific purposes and will therefore
+       // find/report decoding errors in a suitable way.
+       qvalues, _ := url.ParseQuery(r.URL.RawQuery)
+       if val, ok := qvalues["api_token"]; ok {
+               a.Tokens = append(a.Tokens, val...)
+       }
+
+       // TODO: Load token from Rails session cookie (if Rails site
+       // secret is known)
+}
+
+// TODO: LoadTokensFromHttpRequestBody(). We can't assume in
+// LoadTokensFromHttpRequest() that [or how] we should read and parse
+// the request body. This has to be requested explicitly by the
+// application.
diff --git a/sdk/go/auth/basic_auth_go13.go b/sdk/go/auth/basic_auth_go13.go

new file mode 100644 (file)

index 0000000..c0fe5fc
--- /dev/null
+++ b/sdk/go/auth/basic_auth_go13.go
@@ -0,0 +1,28 @@
+// +build !go1.4
+
+package auth
+
+import (
+       "encoding/base64"
+       "net/http"
+       "strings"
+)
+
+func BasicAuth(r *http.Request) (username, password string, ok bool) {
+       tokens := strings.SplitN(r.Header.Get("Authorization"), " ", 2)
+       if len(tokens) != 2 || tokens[0] != "Basic" {
+               return "", "", false
+       }
+
+       decoded, err := base64.StdEncoding.DecodeString(tokens[1])
+       if err != nil {
+               return "", "", false
+       }
+
+       userAndPass := strings.SplitN(string(decoded), ":", 2)
+       if len(userAndPass) != 2 {
+               return "", "", false
+       }
+
+       return userAndPass[0], userAndPass[1], true
+}
diff --git a/sdk/go/auth/basic_auth_go14.go b/sdk/go/auth/basic_auth_go14.go

new file mode 100644 (file)

index 0000000..aeedb06
--- /dev/null
+++ b/sdk/go/auth/basic_auth_go14.go
@@ -0,0 +1,11 @@
+// +build go1.4
+
+package auth
+
+import (
+       "net/http"
+)
+
+func BasicAuth(r *http.Request) (username, password string, ok bool) {
+       return r.BasicAuth()
+}
diff --git a/sdk/go/auth/basic_auth_test.go b/sdk/go/auth/basic_auth_test.go

new file mode 100644 (file)

index 0000000..935f696
--- /dev/null
+++ b/sdk/go/auth/basic_auth_test.go
@@ -0,0 +1,30 @@
+package auth
+
+import (
+       "net/http"
+       "testing"
+)
+
+type basicAuthTestCase struct {
+       hdr  string
+       user string
+       pass string
+       ok   bool
+}
+
+func TestBasicAuth(t *testing.T) {
+       tests := []basicAuthTestCase{
+               {"Basic Zm9vOmJhcg==", "foo", "bar", true},
+               {"Bogus Zm9vOmJhcg==", "", "", false},
+               {"Zm9vOmJhcg==", "", "", false},
+               {"Basic", "", "", false},
+               {"", "", "", false},
+       }
+       for _, test := range tests {
+               if u, p, ok := BasicAuth(&http.Request{Header: map[string][]string{
+                       "Authorization": {test.hdr},
+               }}); u != test.user || p != test.pass || ok != test.ok {
+                       t.Error("got:", u, p, ok, "expected:", test.user, test.pass, test.ok, "from:", test.hdr)
+               }
+       }
+}
diff --git a/sdk/go/blockdigest/blockdigest.go b/sdk/go/blockdigest/blockdigest.go

index 9b818d365303ac6805c15be534400a0c3c854448..d2f1c60ba93889614de18756dcf3ed6608f0b5fa 100644 (file)
--- a/sdk/go/blockdigest/blockdigest.go
+++ b/sdk/go/blockdigest/blockdigest.go
@@ -1,22 +1,41 @@
-/* Stores a Block Locator Digest compactly. Can be used as a map key. */
-
+// Stores a Block Locator Digest compactly. Can be used as a map key.
  package blockdigest
  
  import (
         "fmt"
         "log"
+       "regexp"
         "strconv"
+       "strings"
  )
  
+var LocatorPattern = regexp.MustCompile(
+       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
+
  // Stores a Block Locator Digest compactly, up to 128 bits.
  // Can be used as a map key.
  type BlockDigest struct {
-       h uint64
-       l uint64
+       H uint64
+       L uint64
+}
+
+type DigestWithSize struct {
+       Digest BlockDigest
+       Size   uint32
+}
+
+type BlockLocator struct {
+       Digest BlockDigest
+       Size   int
+       Hints  []string
  }
  
  func (d BlockDigest) String() string {
-       return fmt.Sprintf("%016x%016x", d.h, d.l)
+       return fmt.Sprintf("%016x%016x", d.H, d.L)
+}
+
+func (w DigestWithSize) String() string {
+       return fmt.Sprintf("%s+%d", w.Digest.String(), w.Size)
  }
  
  // Will create a new BlockDigest unless an error is encountered.
@@ -27,11 +46,11 @@ func FromString(s string) (dig BlockDigest, err error) {
         }
  
         var d BlockDigest
-       d.h, err = strconv.ParseUint(s[:16], 16, 64)
+       d.H, err = strconv.ParseUint(s[:16], 16, 64)
         if err != nil {
                 return
         }
-       d.l, err = strconv.ParseUint(s[16:], 16, 64)
+       d.L, err = strconv.ParseUint(s[16:], 16, 64)
         if err != nil {
                 return
         }
@@ -47,3 +66,34 @@ func AssertFromString(s string) BlockDigest {
         }
         return d
  }
+
+func IsBlockLocator(s string) bool {
+       return LocatorPattern.MatchString(s)
+}
+
+func ParseBlockLocator(s string) (b BlockLocator, err error) {
+       if !LocatorPattern.MatchString(s) {
+               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
+                       "\"%s\".",
+                       s,
+                       LocatorPattern.String())
+       } else {
+               tokens := strings.Split(s, "+")
+               var blockSize int64
+               var blockDigest BlockDigest
+               // We expect both of the following to succeed since LocatorPattern
+               // restricts the strings appropriately.
+               blockDigest, err = FromString(tokens[0])
+               if err != nil {
+                       return
+               }
+               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
+               if err != nil {
+                       return
+               }
+               b.Digest = blockDigest
+               b.Size = int(blockSize)
+               b.Hints = tokens[2:]
+       }
+       return
+}
diff --git a/sdk/go/blockdigest/blockdigest_test.go b/sdk/go/blockdigest/blockdigest_test.go

index 068a1385aef5e4f714e5282235da621fb4e970b4..017aaa47101c9a2e1971e7694abac24bc77f1e06 100644 (file)
--- a/sdk/go/blockdigest/blockdigest_test.go
+++ b/sdk/go/blockdigest/blockdigest_test.go
@@ -2,10 +2,37 @@ package blockdigest
  
  import (
         "fmt"
+       "runtime"
         "strings"
         "testing"
  )
  
+func getStackTrace() string {
+       buf := make([]byte, 1000)
+       bytes_written := runtime.Stack(buf, false)
+       return "Stack Trace:\n" + string(buf[:bytes_written])
+}
+
+func expectEqual(t *testing.T, actual interface{}, expected interface{}) {
+       if actual != expected {
+               t.Fatalf("Expected %v but received %v instead. %s",
+                       expected,
+                       actual,
+                       getStackTrace())
+       }
+}
+
+func expectStringSlicesEqual(t *testing.T, actual []string, expected []string) {
+       if len(actual) != len(expected) {
+               t.Fatalf("Expected %v (length %d), but received %v (length %d) instead. %s", expected, len(expected), actual, len(actual), getStackTrace())
+       }
+       for i := range actual {
+               if actual[i] != expected[i] {
+                       t.Fatalf("Expected %v but received %v instead (first disagreement at position %d). %s", expected, actual, i, getStackTrace())
+               }
+       }
+}
+
  func expectValidDigestString(t *testing.T, s string) {
         bd, err := FromString(s)
         if err != nil {
@@ -13,7 +40,7 @@ func expectValidDigestString(t *testing.T, s string) {
         }
  
         expected := strings.ToLower(s)
-               
+
         if expected != bd.String() {
                 t.Fatalf("Expected %s to be returned by FromString(%s).String() but instead we received %s", expected, s, bd.String())
         }
@@ -26,6 +53,26 @@ func expectInvalidDigestString(t *testing.T, s string) {
         }
  }
  
+func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+       expectEqual(t, actual.Digest, expected.Digest)
+       expectEqual(t, actual.Size, expected.Size)
+       expectStringSlicesEqual(t, actual.Hints, expected.Hints)
+}
+
+func expectLocatorPatternMatch(t *testing.T, s string) {
+       if !LocatorPattern.MatchString(s) {
+               t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
+                       s)
+       }
+}
+
+func expectLocatorPatternFail(t *testing.T, s string) {
+       if LocatorPattern.MatchString(s) {
+               t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
+                       s)
+       }
+}
+
  func TestValidDigestStrings(t *testing.T) {
         expectValidDigestString(t, "01234567890123456789abcdefabcdef")
         expectValidDigestString(t, "01234567890123456789ABCDEFABCDEF")
@@ -49,7 +96,7 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
         input := "01234567890123456789abcdefabcdef"
         prettyPrinted := fmt.Sprintf("%v", AssertFromString(input))
         if prettyPrinted != input {
-               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
                         "\"%s\", but instead it was printed as %s",
                         input, input, prettyPrinted)
         }
@@ -58,13 +105,13 @@ func TestBlockDigestGetsPrettyPrintedByPrintf(t *testing.T) {
  func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
         input := "01234567890123456789abcdefabcdef"
         value := 42
-       nested := struct{
+       nested := struct {
                 // Fun trivia fact: If this field was called "digest" instead of
                 // "Digest", then it would not be exported and String() would
                 // never get called on it and our output would look very
                 // different.
                 Digest BlockDigest
-               value int
+               value  int
         }{
                 AssertFromString(input),
                 value,
@@ -72,8 +119,44 @@ func TestBlockDigestGetsPrettyPrintedByPrintfInNestedStructs(t *testing.T) {
         prettyPrinted := fmt.Sprintf("%+v", nested)
         expected := fmt.Sprintf("{Digest:%s value:%d}", input, value)
         if prettyPrinted != expected {
-               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as " +
+               t.Fatalf("Expected blockDigest produced from \"%s\" to be printed as "+
                         "\"%s\", but instead it was printed as %s",
                         input, expected, prettyPrinted)
         }
  }
+
+func TestLocatorPatternBasic(t *testing.T) {
+       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
+       expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
+       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
+       expectLocatorPatternMatch(t,
+               "12345678901234567890123456789012+12345+A1+B123wxyz@_-")
+       expectLocatorPatternMatch(t,
+               "12345678901234567890123456789012+12345+A1+B123wxyz@_-+C@")
+
+       expectLocatorPatternFail(t, "12345678901234567890123456789012")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+")
+       expectLocatorPatternFail(t, "1234567890123456789012345678901+12345")
+       expectLocatorPatternFail(t, "123456789012345678901234567890123+12345")
+       expectLocatorPatternFail(t, "g2345678901234abcdefababdeffdfdf+12345")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345 ")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+1A")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+a1")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A1+B")
+       expectLocatorPatternFail(t, "12345678901234567890123456789012+12345+A+B2")
+}
+
+func TestParseBlockLocatorSimple(t *testing.T) {
+       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
+       if err != nil {
+               t.Fatalf("Unexpected error parsing block locator: %v", err)
+       }
+       expectBlockLocator(t, b, BlockLocator{Digest: AssertFromString("365f83f5f808896ec834c8b595288735"),
+               Size: 2310,
+               Hints: []string{"K@qr1hi",
+                       "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
+}
diff --git a/sdk/go/blockdigest/testing.go b/sdk/go/blockdigest/testing.go

new file mode 100644 (file)

index 0000000..40f08ce
--- /dev/null
+++ b/sdk/go/blockdigest/testing.go
@@ -0,0 +1,16 @@
+// Code used for testing only.
+
+package blockdigest
+
+// Just used for testing when we need some distinct BlockDigests
+func MakeTestBlockDigest(i int) BlockDigest {
+       return BlockDigest{L: uint64(i)}
+}
+
+func MakeTestDigestSpecifySize(i int, s int) DigestWithSize {
+       return DigestWithSize{Digest: BlockDigest{L: uint64(i)}, Size: uint32(s)}
+}
+
+func MakeTestDigestWithSize(i int) DigestWithSize {
+       return MakeTestDigestSpecifySize(i, i)
+}
diff --git a/sdk/go/httpserver/httpserver.go b/sdk/go/httpserver/httpserver.go

new file mode 100644 (file)

index 0000000..396fe42
--- /dev/null
+++ b/sdk/go/httpserver/httpserver.go
@@ -0,0 +1,87 @@
+package httpserver
+
+import (
+       "net"
+       "net/http"
+       "sync"
+       "time"
+)
+
+type Server struct {
+       http.Server
+       Addr     string // host:port where the server is listening.
+       err      error
+       cond     *sync.Cond
+       running  bool
+       listener *net.TCPListener
+       wantDown bool
+}
+
+// Start is essentially (*http.Server)ListenAndServe() with two more
+// features: (1) by the time Start() returns, Addr is changed to the
+// address:port we ended up listening to -- which makes listening on
+// ":0" useful in test suites -- and (2) the server can be shut down
+// without killing the process -- which is useful in test cases, and
+// makes it possible to shut down gracefully on SIGTERM without
+// killing active connections.
+func (srv *Server) Start() error {
+       addr, err := net.ResolveTCPAddr("tcp", srv.Addr)
+       if err != nil {
+               return err
+       }
+       srv.listener, err = net.ListenTCP("tcp", addr)
+       if err != nil {
+               return err
+       }
+       srv.Addr = srv.listener.Addr().String()
+
+       mutex := &sync.RWMutex{}
+       srv.cond = sync.NewCond(mutex.RLocker())
+       srv.running = true
+       go func() {
+               err = srv.Serve(tcpKeepAliveListener{srv.listener})
+               if !srv.wantDown {
+                       srv.err = err
+               }
+               mutex.Lock()
+               srv.running = false
+               srv.cond.Broadcast()
+               mutex.Unlock()
+       }()
+       return nil
+}
+
+// Close shuts down the server and returns when it has stopped.
+func (srv *Server) Close() error {
+       srv.wantDown = true
+       srv.listener.Close()
+       return srv.Wait()
+}
+
+// Wait returns when the server has shut down.
+func (srv *Server) Wait() error {
+       if srv.cond == nil {
+               return nil
+       }
+       srv.cond.L.Lock()
+       defer srv.cond.L.Unlock()
+       for srv.running {
+               srv.cond.Wait()
+       }
+       return srv.err
+}
+
+// tcpKeepAliveListener is copied from net/http because not exported.
+type tcpKeepAliveListener struct {
+       *net.TCPListener
+}
+
+func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
+       tc, err := ln.AcceptTCP()
+       if err != nil {
+               return
+       }
+       tc.SetKeepAlive(true)
+       tc.SetKeepAlivePeriod(3 * time.Minute)
+       return tc, nil
+}
diff --git a/sdk/go/httpserver/log.go b/sdk/go/httpserver/log.go

new file mode 100644 (file)

index 0000000..cdfc595
--- /dev/null
+++ b/sdk/go/httpserver/log.go
@@ -0,0 +1,24 @@
+package httpserver
+
+import (
+       "fmt"
+       "log"
+)
+
+// Log calls log.Println but first transforms strings so they are
+// safer to write in logs (e.g., 'foo"bar' becomes
+// '"foo\"bar"'). Arguments that aren't strings and don't have a
+// (String() string) method are left alone.
+func Log(args ...interface{}) {
+       newargs := make([]interface{}, len(args))
+       for i, arg := range args {
+               if s, ok := arg.(string); ok {
+                       newargs[i] = fmt.Sprintf("%+q", s)
+               } else if s, ok := arg.(fmt.Stringer); ok {
+                       newargs[i] = fmt.Sprintf("%+q", s.String())
+               } else {
+                       newargs[i] = arg
+               }
+       }
+       log.Println(newargs...)
+}
diff --git a/sdk/go/httpserver/responsewriter.go b/sdk/go/httpserver/responsewriter.go

new file mode 100644 (file)

index 0000000..1af4dc8
--- /dev/null
+++ b/sdk/go/httpserver/responsewriter.go
@@ -0,0 +1,43 @@
+package httpserver
+
+import (
+       "net/http"
+)
+
+// ResponseWriter wraps http.ResponseWriter and exposes the status
+// sent, the number of bytes sent to the client, and the last write
+// error.
+type ResponseWriter struct {
+       http.ResponseWriter
+       wroteStatus *int        // Last status given to WriteHeader()
+       wroteBodyBytes *int     // Bytes successfully written
+       err *error              // Last error returned from Write()
+}
+
+func WrapResponseWriter(orig http.ResponseWriter) ResponseWriter {
+       return ResponseWriter{orig, new(int), new(int), new(error)}
+}
+
+func (w ResponseWriter) WriteHeader(s int) {
+       *w.wroteStatus = s
+       w.ResponseWriter.WriteHeader(s)
+}
+
+func (w ResponseWriter) Write(data []byte) (n int, err error) {
+       n, err = w.ResponseWriter.Write(data)
+       *w.wroteBodyBytes += n
+       *w.err = err
+       return
+}
+
+func (w ResponseWriter) WroteStatus() int {
+       return *w.wroteStatus
+}
+
+func (w ResponseWriter) WroteBodyBytes() int {
+       return *w.wroteBodyBytes
+}
+
+func (w ResponseWriter) Err() error {
+       return *w.err
+}
diff --git a/sdk/go/keepclient/hashcheck.go b/sdk/go/keepclient/hashcheck.go

index 1f696d95b64d60f69c772063be7ae5b60e671cca..1706134757fae14a6cc0f0ccecc1a7b6a46a3002 100644 (file)
--- a/sdk/go/keepclient/hashcheck.go
+++ b/sdk/go/keepclient/hashcheck.go
@@ -1,8 +1,3 @@
-// Lightweight implementation of io.ReadCloser that checks the contents read
-// from the underlying io.Reader a against checksum hash.  To avoid reading the
-// entire contents into a buffer up front, the hash is updated with each read,
-// and the actual checksum is not checked until the underlying reader returns
-// EOF.
  package keepclient
  
  import (
@@ -14,20 +9,22 @@ import (
  
  var BadChecksum = errors.New("Reader failed checksum")
  
+// HashCheckingReader is an io.ReadCloser that checks the contents
+// read from the underlying io.Reader against the provided hash.
  type HashCheckingReader struct {
         // The underlying data source
         io.Reader
  
-       // The hashing function to use
+       // The hash function to use
         hash.Hash
  
         // The hash value to check against.  Must be a hex-encoded lowercase string.
         Check string
  }
  
-// Read from the underlying reader, update the hashing function, and pass the
-// results through.  Will return BadChecksum on the last read instead of EOF if
-// the checksum doesn't match.
+// Reads from the underlying reader, update the hashing function, and
+// pass the results through. Returns BadChecksum (instead of EOF) on
+// the last read if the checksum doesn't match.
  func (this HashCheckingReader) Read(p []byte) (n int, err error) {
         n, err = this.Reader.Read(p)
         if n > 0 {
@@ -42,8 +39,8 @@ func (this HashCheckingReader) Read(p []byte) (n int, err error) {
         return n, err
  }
  
-// Write entire contents of this.Reader to 'dest'.  Returns BadChecksum if the
-// data written to 'dest' doesn't match the hash code of this.Check.
+// WriteTo writes the entire contents of this.Reader to dest.  Returns
+// BadChecksum if the checksum doesn't match.
  func (this HashCheckingReader) WriteTo(dest io.Writer) (written int64, err error) {
         if writeto, ok := this.Reader.(io.WriterTo); ok {
                 written, err = writeto.WriteTo(io.MultiWriter(dest, this.Hash))
@@ -60,8 +57,9 @@ func (this HashCheckingReader) WriteTo(dest io.Writer) (written int64, err error
         return written, err
  }
  
-// Close() the underlying Reader if it is castable to io.ReadCloser.  This will
-// drain the underlying reader of any remaining data and check the checksum.
+// Close reads all remaining data from the underlying Reader and
+// returns BadChecksum if the checksum doesn't match. It also closes
+// the underlying Reader if it implements io.ReadCloser.
  func (this HashCheckingReader) Close() (err error) {
         _, err = io.Copy(this.Hash, this.Reader)
  
diff --git a/sdk/go/keepclient/keepclient.go b/sdk/go/keepclient/keepclient.go

index 5d791948dcb808f3373555d183d61f7df5a22100..f82e5c7c594062f23da7ab42db3c4971738d5597 100644 (file)
--- a/sdk/go/keepclient/keepclient.go
+++ b/sdk/go/keepclient/keepclient.go
@@ -14,11 +14,9 @@ import (
         "net/http"
         "os"
         "regexp"
+       "strconv"
         "strings"
         "sync"
-       "sync/atomic"
-       "time"
-       "unsafe"
  )
  
  // A Keep "block" is 64MB.
@@ -26,9 +24,10 @@ const BLOCKSIZE = 64 * 1024 * 1024
  
  var BlockNotFound = errors.New("Block not found")
  var InsufficientReplicasError = errors.New("Could not write sufficient replicas")
-var OversizeBlockError = errors.New("Block too big")
+var OversizeBlockError = errors.New("Exceeded maximum block size (" + strconv.Itoa(BLOCKSIZE) + ")")
  var MissingArvadosApiHost = errors.New("Missing required environment variable ARVADOS_API_HOST")
  var MissingArvadosApiToken = errors.New("Missing required environment variable ARVADOS_API_TOKEN")
+var InvalidLocatorError = errors.New("Invalid locator")
  
  const X_Keep_Desired_Replicas = "X-Keep-Desired-Replicas"
  const X_Keep_Replicas_Stored = "X-Keep-Replicas-Stored"
@@ -38,42 +37,44 @@ type KeepClient struct {
         Arvados       *arvadosclient.ArvadosClient
         Want_replicas int
         Using_proxy   bool
-       service_roots *map[string]string
-       lock          sync.Mutex
+       localRoots    *map[string]string
+       writableLocalRoots *map[string]string
+       gatewayRoots  *map[string]string
+       lock          sync.RWMutex
         Client        *http.Client
  }
  
  // Create a new KeepClient.  This will contact the API server to discover Keep
  // servers.
-func MakeKeepClient(arv *arvadosclient.ArvadosClient) (kc KeepClient, err error) {
+func MakeKeepClient(arv *arvadosclient.ArvadosClient) (*KeepClient, error) {
         var matchTrue = regexp.MustCompile("^(?i:1|yes|true)$")
         insecure := matchTrue.MatchString(os.Getenv("ARVADOS_API_HOST_INSECURE"))
-       kc = KeepClient{
+       kc := &KeepClient{
                 Arvados:       arv,
                 Want_replicas: 2,
                 Using_proxy:   false,
                 Client: &http.Client{Transport: &http.Transport{
                         TLSClientConfig: &tls.Config{InsecureSkipVerify: insecure}}},
         }
-       _, err = (&kc).DiscoverKeepServers()
-
-       return kc, err
+       return kc, kc.DiscoverKeepServers()
  }
  
-// Put a block given the block hash, a reader with the block data, and the
-// expected length of that data.  The desired number of replicas is given in
-// KeepClient.Want_replicas.  Returns the number of replicas that were written
-// and if there was an error.  Note this will return InsufficientReplias
-// whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutHR(hash string, r io.Reader, expectedLength int64) (locator string, replicas int, err error) {
-
+// Put a block given the block hash, a reader, and the number of bytes
+// to read from the reader (which must be between 0 and BLOCKSIZE).
+//
+// Returns the locator for the written block, the number of replicas
+// written, and an error.
+//
+// Returns an InsufficientReplicas error if 0 <= replicas <
+// kc.Wants_replicas.
+func (kc *KeepClient) PutHR(hash string, r io.Reader, dataBytes int64) (string, int, error) {
         // Buffer for reads from 'r'
         var bufsize int
-       if expectedLength > 0 {
-               if expectedLength > BLOCKSIZE {
+       if dataBytes > 0 {
+               if dataBytes > BLOCKSIZE {
                         return "", 0, OversizeBlockError
                 }
-               bufsize = int(expectedLength)
+               bufsize = int(dataBytes)
         } else {
                 bufsize = BLOCKSIZE
         }
@@ -81,215 +82,216 @@ func (this KeepClient) PutHR(hash string, r io.Reader, expectedLength int64) (lo
         t := streamer.AsyncStreamFromReader(bufsize, HashCheckingReader{r, md5.New(), hash})
         defer t.Close()
  
-       return this.putReplicas(hash, t, expectedLength)
+       return kc.putReplicas(hash, t, dataBytes)
  }
  
-// Put a block given the block hash and a byte buffer.  The desired number of
-// replicas is given in KeepClient.Want_replicas.  Returns the number of
-// replicas that were written and if there was an error.  Note this will return
-// InsufficientReplias whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutHB(hash string, buf []byte) (locator string, replicas int, err error) {
+// PutHB writes a block to Keep. The hash of the bytes is given in
+// hash, and the data is given in buf.
+//
+// Return values are the same as for PutHR.
+func (kc *KeepClient) PutHB(hash string, buf []byte) (string, int, error) {
         t := streamer.AsyncStreamFromSlice(buf)
         defer t.Close()
-
-       return this.putReplicas(hash, t, int64(len(buf)))
+       return kc.putReplicas(hash, t, int64(len(buf)))
  }
  
-// Put a block given a buffer.  The hash will be computed.  The desired number
-// of replicas is given in KeepClient.Want_replicas.  Returns the number of
-// replicas that were written and if there was an error.  Note this will return
-// InsufficientReplias whenever 0 <= replicas < this.Wants_replicas.
-func (this KeepClient) PutB(buffer []byte) (locator string, replicas int, err error) {
+// PutB writes a block to Keep. It computes the hash itself.
+//
+// Return values are the same as for PutHR.
+func (kc *KeepClient) PutB(buffer []byte) (string, int, error) {
         hash := fmt.Sprintf("%x", md5.Sum(buffer))
-       return this.PutHB(hash, buffer)
+       return kc.PutHB(hash, buffer)
  }
  
-// Put a block, given a Reader.  This will read the entire reader into a buffer
-// to compute the hash.  The desired number of replicas is given in
-// KeepClient.Want_replicas.  Returns the number of replicas that were written
-// and if there was an error.  Note this will return InsufficientReplias
-// whenever 0 <= replicas < this.Wants_replicas.  Also nhote that if the block
-// hash and data size are available, PutHR() is more efficient.
-func (this KeepClient) PutR(r io.Reader) (locator string, replicas int, err error) {
+// PutR writes a block to Keep. It first reads all data from r into a buffer
+// in order to compute the hash.
+//
+// Return values are the same as for PutHR.
+//
+// If the block hash and data size are known, PutHR is more efficient.
+func (kc *KeepClient) PutR(r io.Reader) (locator string, replicas int, err error) {
         if buffer, err := ioutil.ReadAll(r); err != nil {
                 return "", 0, err
         } else {
-               return this.PutB(buffer)
+               return kc.PutB(buffer)
         }
  }
  
-// Get a block given a hash.  Return a reader, the expected data length, the
-// URL the block was fetched from, and if there was an error.  If the block
-// checksum does not match, the final Read() on the reader returned by this
-// method will return a BadChecksum error instead of EOF.
-func (this KeepClient) Get(hash string) (reader io.ReadCloser,
-       contentLength int64, url string, err error) {
-       return this.AuthorizedGet(hash, "", "")
-}
-
-// Get a block given a hash, with additional authorization provided by
-// signature and timestamp.  Return a reader, the expected data length, the URL
-// the block was fetched from, and if there was an error.  If the block
-// checksum does not match, the final Read() on the reader returned by this
-// method will return a BadChecksum error instead of EOF.
-func (this KeepClient) AuthorizedGet(hash string,
-       signature string,
-       timestamp string) (reader io.ReadCloser,
-       contentLength int64, url string, err error) {
-
-       // Take the hash of locator and timestamp in order to identify this
-       // specific transaction in log statements.
-       requestId := fmt.Sprintf("%x", md5.Sum([]byte(hash+time.Now().String())))[0:8]
-
-       // Calculate the ordering for asking servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
-
-       for _, host := range sv {
-               var req *http.Request
-               var err error
-               var url string
-               if signature != "" {
-                       url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
-                               signature, timestamp)
-               } else {
-                       url = fmt.Sprintf("%s/%s", host, hash)
-               }
-               if req, err = http.NewRequest("GET", url, nil); err != nil {
+// Get() retrieves a block, given a locator. Returns a reader, the
+// expected data length, the URL the block is being fetched from, and
+// an error.
+//
+// If the block checksum does not match, the final Read() on the
+// reader returned by this method will return a BadChecksum error
+// instead of EOF.
+func (kc *KeepClient) Get(locator string) (io.ReadCloser, int64, string, error) {
+       var errs []string
+       for _, host := range kc.getSortedRoots(locator) {
+               url := host + "/" + locator
+               req, err := http.NewRequest("GET", url, nil)
+               if err != nil {
                         continue
                 }
-
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
-
-               log.Printf("[%v] Begin download %s", requestId, url)
-
-               var resp *http.Response
-               if resp, err = this.Client.Do(req); err != nil || resp.StatusCode != http.StatusOK {
-                       statusCode := -1
-                       var respbody []byte
+               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+               resp, err := kc.Client.Do(req)
+               if err != nil || resp.StatusCode != http.StatusOK {
                         if resp != nil {
-                               statusCode = resp.StatusCode
+                               var respbody []byte
                                 if resp.Body != nil {
                                         respbody, _ = ioutil.ReadAll(&io.LimitedReader{resp.Body, 4096})
                                 }
+                               errs = append(errs, fmt.Sprintf("%s: %d %s",
+                                       url, resp.StatusCode, strings.TrimSpace(string(respbody))))
+                       } else {
+                               errs = append(errs, fmt.Sprintf("%s: %v", url, err))
                         }
-                       response := strings.TrimSpace(string(respbody))
-                       log.Printf("[%v] Download %v status code: %v error: \"%v\" response: \"%v\"",
-                               requestId, url, statusCode, err, response)
                         continue
                 }
-
-               if resp.StatusCode == http.StatusOK {
-                       log.Printf("[%v] Download %v status code: %v", requestId, url, resp.StatusCode)
-                       return HashCheckingReader{resp.Body, md5.New(), hash}, resp.ContentLength, url, nil
-               }
+               return HashCheckingReader{
+                       Reader: resp.Body,
+                       Hash:   md5.New(),
+                       Check:  locator[0:32],
+               }, resp.ContentLength, url, nil
         }
-
+       log.Printf("DEBUG: GET %s failed: %v", locator, errs)
         return nil, 0, "", BlockNotFound
  }
  
-// Determine if a block with the given hash is available and readable, but does
-// not return the block contents.
-func (this KeepClient) Ask(hash string) (contentLength int64, url string, err error) {
-       return this.AuthorizedAsk(hash, "", "")
-}
-
-// Determine if a block with the given hash is available and readable with the
-// given signature and timestamp, but does not return the block contents.
-func (this KeepClient) AuthorizedAsk(hash string, signature string,
-       timestamp string) (contentLength int64, url string, err error) {
-       // Calculate the ordering for asking servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
-
-       for _, host := range sv {
-               var req *http.Request
-               var err error
-               if signature != "" {
-                       url = fmt.Sprintf("%s/%s+A%s@%s", host, hash,
-                               signature, timestamp)
-               } else {
-                       url = fmt.Sprintf("%s/%s", host, hash)
-               }
-
-               if req, err = http.NewRequest("HEAD", url, nil); err != nil {
-                       continue
-               }
-
-               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", this.Arvados.ApiToken))
-
-               var resp *http.Response
-               if resp, err = this.Client.Do(req); err != nil {
+// Ask() verifies that a block with the given hash is available and
+// readable, according to at least one Keep service. Unlike Get, it
+// does not retrieve the data or verify that the data content matches
+// the hash specified by the locator.
+//
+// Returns the data size (content length) reported by the Keep service
+// and the URI reporting the data size.
+func (kc *KeepClient) Ask(locator string) (int64, string, error) {
+       for _, host := range kc.getSortedRoots(locator) {
+               url := host + "/" + locator
+               req, err := http.NewRequest("HEAD", url, nil)
+               if err != nil {
                         continue
                 }
-
-               if resp.StatusCode == http.StatusOK {
+               req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", kc.Arvados.ApiToken))
+               if resp, err := kc.Client.Do(req); err == nil && resp.StatusCode == http.StatusOK {
                         return resp.ContentLength, url, nil
                 }
         }
-
         return 0, "", BlockNotFound
+}
  
+// LocalRoots() returns the map of local (i.e., disk and proxy) Keep
+// services: uuid -> baseURI.
+func (kc *KeepClient) LocalRoots() map[string]string {
+       kc.lock.RLock()
+       defer kc.lock.RUnlock()
+       return *kc.localRoots
  }
  
-// Atomically read the service_roots field.
-func (this *KeepClient) ServiceRoots() map[string]string {
-       r := (*map[string]string)(atomic.LoadPointer((*unsafe.Pointer)(unsafe.Pointer(&this.service_roots))))
-       return *r
+// GatewayRoots() returns the map of Keep remote gateway services:
+// uuid -> baseURI.
+func (kc *KeepClient) GatewayRoots() map[string]string {
+       kc.lock.RLock()
+       defer kc.lock.RUnlock()
+       return *kc.gatewayRoots
  }
  
-// Atomically update the service_roots field.  Enables you to update
-// service_roots without disrupting any GET or PUT operations that might
-// already be in progress.
-func (this *KeepClient) SetServiceRoots(new_roots map[string]string) {
-       roots := make(map[string]string)
-       for uuid, root := range new_roots {
-               roots[uuid] = root
-       }
-       atomic.StorePointer((*unsafe.Pointer)(unsafe.Pointer(&this.service_roots)),
-               unsafe.Pointer(&roots))
+// WritableLocalRoots() returns the map of writable local Keep services:
+// uuid -> baseURI.
+func (kc *KeepClient) WritableLocalRoots() map[string]string {
+       kc.lock.RLock()
+       defer kc.lock.RUnlock()
+       return *kc.writableLocalRoots
  }
  
-type Locator struct {
-       Hash      string
-       Size      int
-       Signature string
-       Timestamp string
+// SetServiceRoots updates the localRoots and gatewayRoots maps,
+// without risk of disrupting operations that are already in progress.
+//
+// The KeepClient makes its own copy of the supplied maps, so the
+// caller can reuse/modify them after SetServiceRoots returns, but
+// they should not be modified by any other goroutine while
+// SetServiceRoots is running.
+func (kc *KeepClient) SetServiceRoots(newLocals, newWritableLocals map[string]string, newGateways map[string]string) {
+       locals := make(map[string]string)
+       for uuid, root := range newLocals {
+               locals[uuid] = root
+       }
+
+       writables := make(map[string]string)
+       for uuid, root := range newWritableLocals {
+               writables[uuid] = root
+       }
+
+       gateways := make(map[string]string)
+       for uuid, root := range newGateways {
+               gateways[uuid] = root
+       }
+
+       kc.lock.Lock()
+       defer kc.lock.Unlock()
+       kc.localRoots = &locals
+       kc.writableLocalRoots = &writables
+       kc.gatewayRoots = &gateways
  }
  
-func MakeLocator2(hash string, hints string) (locator Locator) {
-       locator.Hash = hash
-       if hints != "" {
-               signature_pat, _ := regexp.Compile("^A([[:xdigit:]]+)@([[:xdigit:]]{8})$")
-               for _, hint := range strings.Split(hints, "+") {
-                       if hint != "" {
-                               if match, _ := regexp.MatchString("^[[:digit:]]+$", hint); match {
-                                       fmt.Sscanf(hint, "%d", &locator.Size)
-                               } else if m := signature_pat.FindStringSubmatch(hint); m != nil {
-                                       locator.Signature = m[1]
-                                       locator.Timestamp = m[2]
-                               } else if match, _ := regexp.MatchString("^[:upper:]", hint); match {
-                                       // Any unknown hint that starts with an uppercase letter is
-                                       // presumed to be valid and ignored, to permit forward compatibility.
-                               } else {
-                                       // Unknown format; not a valid locator.
-                                       return Locator{"", 0, "", ""}
-                               }
+// getSortedRoots returns a list of base URIs of Keep services, in the
+// order they should be attempted in order to retrieve content for the
+// given locator.
+func (kc *KeepClient) getSortedRoots(locator string) []string {
+       var found []string
+       for _, hint := range strings.Split(locator, "+") {
+               if len(hint) < 7 || hint[0:2] != "K@" {
+                       // Not a service hint.
+                       continue
+               }
+               if len(hint) == 7 {
+                       // +K@abcde means fetch from proxy at
+                       // keep.abcde.arvadosapi.com
+                       found = append(found, "https://keep."+hint[2:]+".arvadosapi.com")
+               } else if len(hint) == 29 {
+                       // +K@abcde-abcde-abcdeabcdeabcde means fetch
+                       // from gateway with given uuid
+                       if gwURI, ok := kc.GatewayRoots()[hint[2:]]; ok {
+                               found = append(found, gwURI)
                         }
+                       // else this hint is no use to us; carry on.
                 }
         }
-       return locator
+       // After trying all usable service hints, fall back to local roots.
+       found = append(found, NewRootSorter(kc.LocalRoots(), locator[0:32]).GetSortedRoots()...)
+       return found
+}
+
+type Locator struct {
+       Hash  string
+       Size  int      // -1 if data size is not known
+       Hints []string // Including the size hint, if any
  }
  
-func MakeLocator(path string) Locator {
-       pathpattern, err := regexp.Compile("^([0-9a-f]{32})([+].*)?$")
-       if err != nil {
-               log.Print("Don't like regexp", err)
+func (loc *Locator) String() string {
+       s := loc.Hash
+       if len(loc.Hints) > 0 {
+               s = s + "+" + strings.Join(loc.Hints, "+")
         }
+       return s
+}
  
-       sm := pathpattern.FindStringSubmatch(path)
+var locatorMatcher = regexp.MustCompile("^([0-9a-f]{32})([+](.*))?$")
+
+func MakeLocator(path string) (*Locator, error) {
+       sm := locatorMatcher.FindStringSubmatch(path)
         if sm == nil {
-               log.Print("Failed match ", path)
-               return Locator{"", 0, "", ""}
+               return nil, InvalidLocatorError
         }
-
-       return MakeLocator2(sm[1], sm[2])
+       loc := Locator{Hash: sm[1], Size: -1}
+       if sm[2] != "" {
+               loc.Hints = strings.Split(sm[3], "+")
+       } else {
+               loc.Hints = []string{}
+       }
+       if len(loc.Hints) > 0 {
+               if size, err := strconv.Atoi(loc.Hints[0]); err == nil {
+                       loc.Size = size
+               }
+       }
+       return &loc, nil
  }
diff --git a/sdk/go/keepclient/keepclient_test.go b/sdk/go/keepclient/keepclient_test.go

index cbd27d72e7c7e9310de1ed027e47912b7a187baa..c1f6a3e6f9a2614fc362985be67c86aeff355624 100644 (file)
--- a/sdk/go/keepclient/keepclient_test.go
+++ b/sdk/go/keepclient/keepclient_test.go
@@ -63,8 +63,8 @@ func (s *ServerRequiredSuite) TestMakeKeepClient(c *C) {
         kc, err := MakeKeepClient(&arv)
  
         c.Assert(err, Equals, nil)
-       c.Check(len(kc.ServiceRoots()), Equals, 2)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 2)
+       for _, root := range kc.LocalRoots() {
                 c.Check(root, Matches, "http://localhost:\\d+")
         }
  }
@@ -77,14 +77,14 @@ type StubPutHandler struct {
         handled        chan string
  }
  
-func (this StubPutHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       this.c.Check(req.URL.Path, Equals, "/"+this.expectPath)
-       this.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", this.expectApiToken))
+func (sph StubPutHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       sph.c.Check(req.URL.Path, Equals, "/"+sph.expectPath)
+       sph.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", sph.expectApiToken))
         body, err := ioutil.ReadAll(req.Body)
-       this.c.Check(err, Equals, nil)
-       this.c.Check(body, DeepEquals, []byte(this.expectBody))
+       sph.c.Check(err, Equals, nil)
+       sph.c.Check(body, DeepEquals, []byte(sph.expectBody))
         resp.WriteHeader(200)
-       this.handled <- fmt.Sprintf("http://%s", req.Host)
+       sph.handled <- fmt.Sprintf("http://%s", req.Host)
  }
  
  func RunFakeKeepServer(st http.Handler) (ks KeepServer) {
@@ -98,7 +98,7 @@ func RunFakeKeepServer(st http.Handler) (ks KeepServer) {
         return
  }
  
-func UploadToStubHelper(c *C, st http.Handler, f func(KeepClient, string,
+func UploadToStubHelper(c *C, st http.Handler, f func(*KeepClient, string,
         io.ReadCloser, io.WriteCloser, chan uploadStatus)) {
  
         ks := RunFakeKeepServer(st)
@@ -126,7 +126,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServer(c *C) {
                 make(chan string)}
  
         UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                         writer io.WriteCloser, upload_status chan uploadStatus) {
  
                         go kc.uploadToKeepServer(url, st.expectPath, reader, upload_status, int64(len("foo")), "TestUploadToStubKeepServer")
@@ -153,7 +153,7 @@ func (s *StandaloneSuite) TestUploadToStubKeepServerBufferReader(c *C) {
                 make(chan string)}
  
         UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                         writer io.WriteCloser, upload_status chan uploadStatus) {
  
                         tr := streamer.AsyncStreamFromReader(512, reader)
@@ -179,9 +179,9 @@ type FailHandler struct {
         handled chan string
  }
  
-func (this FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+func (fh FailHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
         resp.WriteHeader(500)
-       this.handled <- fmt.Sprintf("http://%s", req.Host)
+       fh.handled <- fmt.Sprintf("http://%s", req.Host)
  }
  
  func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
@@ -193,7 +193,7 @@ func (s *StandaloneSuite) TestFailedUploadToStubKeepServer(c *C) {
         hash := "acbd18db4cc2f85cedef654fccc4a4d8"
  
         UploadToStubHelper(c, st,
-               func(kc KeepClient, url string, reader io.ReadCloser,
+               func(kc *KeepClient, url string, reader io.ReadCloser,
                         writer io.WriteCloser, upload_status chan uploadStatus) {
  
                         go kc.uploadToKeepServer(url, hash, reader, upload_status, 3, "TestFailedUploadToStubKeepServer")
@@ -242,21 +242,23 @@ func (s *StandaloneSuite) TestPutB(c *C) {
  
         kc.Want_replicas = 2
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks := RunSomeFakeKeepServers(st, 5)
  
         for i, k := range ks {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         kc.PutB([]byte("foo"))
  
         shuff := NewRootSorter(
-               kc.ServiceRoots(), Md5String("foo")).GetSortedRoots()
+               kc.LocalRoots(), Md5String("foo")).GetSortedRoots()
  
         s1 := <-st.handled
         s2 := <-st.handled
@@ -285,16 +287,18 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
  
         kc.Want_replicas = 2
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks := RunSomeFakeKeepServers(st, 5)
  
         for i, k := range ks {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         reader, writer := io.Pipe()
  
@@ -305,7 +309,7 @@ func (s *StandaloneSuite) TestPutHR(c *C) {
  
         kc.PutHR(hash, reader, 3)
  
-       shuff := NewRootSorter(kc.ServiceRoots(), hash).GetSortedRoots()
+       shuff := NewRootSorter(kc.LocalRoots(), hash).GetSortedRoots()
         log.Print(shuff)
  
         s1 := <-st.handled
@@ -339,24 +343,27 @@ func (s *StandaloneSuite) TestPutWithFail(c *C) {
  
         kc.Want_replicas = 2
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks1 := RunSomeFakeKeepServers(st, 4)
         ks2 := RunSomeFakeKeepServers(fh, 1)
  
         for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
         for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         shuff := NewRootSorter(
-               kc.ServiceRoots(), Md5String("foo")).GetSortedRoots()
+               kc.LocalRoots(), Md5String("foo")).GetSortedRoots()
  
         phash, replicas, err := kc.PutB([]byte("foo"))
  
@@ -395,21 +402,24 @@ func (s *StandaloneSuite) TestPutWithTooManyFail(c *C) {
  
         kc.Want_replicas = 2
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks1 := RunSomeFakeKeepServers(st, 1)
         ks2 := RunSomeFakeKeepServers(fh, 4)
  
         for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
         for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         _, replicas, err := kc.PutB([]byte("foo"))
  
@@ -424,14 +434,16 @@ type StubGetHandler struct {
         c              *C
         expectPath     string
         expectApiToken string
-       returnBody     []byte
+       httpStatus     int
+       body           []byte
  }
  
-func (this StubGetHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       this.c.Check(req.URL.Path, Equals, "/"+this.expectPath)
-       this.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", this.expectApiToken))
-       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(this.returnBody)))
-       resp.Write(this.returnBody)
+func (sgh StubGetHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       sgh.c.Check(req.URL.Path, Equals, "/"+sgh.expectPath)
+       sgh.c.Check(req.Header.Get("Authorization"), Equals, fmt.Sprintf("OAuth2 %s", sgh.expectApiToken))
+       resp.WriteHeader(sgh.httpStatus)
+       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(sgh.body)))
+       resp.Write(sgh.body)
  }
  
  func (s *StandaloneSuite) TestGet(c *C) {
@@ -443,6 +455,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
                 c,
                 hash,
                 "abc123",
+               http.StatusOK,
                 []byte("foo")}
  
         ks := RunFakeKeepServer(st)
@@ -451,7 +464,7 @@ func (s *StandaloneSuite) TestGet(c *C) {
         arv, err := arvadosclient.MakeArvadosClient()
         kc, _ := MakeKeepClient(&arv)
         arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
  
         r, n, url2, err := kc.Get(hash)
         defer r.Close()
@@ -477,7 +490,7 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
         arv, err := arvadosclient.MakeArvadosClient()
         kc, _ := MakeKeepClient(&arv)
         arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
  
         r, n, url2, err := kc.Get(hash)
         c.Check(err, Equals, BlockNotFound)
@@ -486,6 +499,140 @@ func (s *StandaloneSuite) TestGetFail(c *C) {
         c.Check(r, Equals, nil)
  }
  
+func (s *StandaloneSuite) TestGetWithServiceHint(c *C) {
+       uuid := "zzzzz-bi6l4-123451234512345"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       // This one shouldn't be used:
+       ks0 := RunFakeKeepServer(StubGetHandler{
+               c,
+               "error if used",
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks0.listener.Close()
+       // This one should be used:
+       ks := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{"x": ks0.url},
+               map[string]string{"x": ks0.url},
+               map[string]string{uuid: ks.url})
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ks.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+// Use a service hint to fetch from a local disk service, overriding
+// rendezvous probe order.
+func (s *StandaloneSuite) TestGetWithLocalServiceHint(c *C) {
+       uuid := "zzzzz-bi6l4-zzzzzzzzzzzzzzz"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       // This one shouldn't be used, although it appears first in
+       // rendezvous probe order:
+       ks0 := RunFakeKeepServer(StubGetHandler{
+               c,
+               "error if used",
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks0.listener.Close()
+       // This one should be used:
+       ks := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ks.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{
+                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
+                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
+                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
+                       uuid: ks.url},
+               map[string]string{
+                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
+                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
+                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
+                       uuid: ks.url},
+               map[string]string{
+                       "zzzzz-bi6l4-yyyyyyyyyyyyyyy": ks0.url,
+                       "zzzzz-bi6l4-xxxxxxxxxxxxxxx": ks0.url,
+                       "zzzzz-bi6l4-wwwwwwwwwwwwwww": ks0.url,
+                       uuid: ks.url},
+       )
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       defer r.Close()
+       c.Check(err, Equals, nil)
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ks.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
+func (s *StandaloneSuite) TestGetWithServiceHintFailoverToLocals(c *C) {
+       uuid := "zzzzz-bi6l4-123451234512345"
+       hash := fmt.Sprintf("%x", md5.Sum([]byte("foo")))
+
+       ksLocal := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusOK,
+               []byte("foo")})
+       defer ksLocal.listener.Close()
+       ksGateway := RunFakeKeepServer(StubGetHandler{
+               c,
+               hash + "+K@" + uuid,
+               "abc123",
+               http.StatusInternalServerError,
+               []byte("Error")})
+       defer ksGateway.listener.Close()
+
+       arv, err := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+       arv.ApiToken = "abc123"
+       kc.SetServiceRoots(
+               map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+               map[string]string{"zzzzz-bi6l4-keepdisk0000000": ksLocal.url},
+               map[string]string{uuid: ksGateway.url})
+
+       r, n, uri, err := kc.Get(hash + "+K@" + uuid)
+       c.Assert(err, Equals, nil)
+       defer r.Close()
+       c.Check(n, Equals, int64(3))
+       c.Check(uri, Equals, fmt.Sprintf("%s/%s", ksLocal.url, hash+"+K@"+uuid))
+
+       content, err := ioutil.ReadAll(r)
+       c.Check(err, Equals, nil)
+       c.Check(content, DeepEquals, []byte("foo"))
+}
+
  type BarHandler struct {
         handled chan string
  }
@@ -507,7 +654,7 @@ func (s *StandaloneSuite) TestChecksum(c *C) {
         arv, err := arvadosclient.MakeArvadosClient()
         kc, _ := MakeKeepClient(&arv)
         arv.ApiToken = "abc123"
-       kc.SetServiceRoots(map[string]string{"x": ks.url})
+       kc.SetServiceRoots(map[string]string{"x": ks.url}, map[string]string{ks.url: ""}, nil)
  
         r, n, _, err := kc.Get(barhash)
         _, err = ioutil.ReadAll(r)
@@ -535,26 +682,30 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
                 c,
                 hash,
                 "abc123",
+               http.StatusOK,
                 content}
  
         arv, err := arvadosclient.MakeArvadosClient()
         kc, _ := MakeKeepClient(&arv)
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks1 := RunSomeFakeKeepServers(st, 1)
         ks2 := RunSomeFakeKeepServers(fh, 4)
  
         for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
         for i, k := range ks2 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i+len(ks1))] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         // This test works only if one of the failing services is
         // attempted before the succeeding service. Otherwise,
@@ -562,7 +713,7 @@ func (s *StandaloneSuite) TestGetWithFailures(c *C) {
         // the choice of block content "waz" and the UUIDs of the fake
         // servers, so we just tried different strings until we found
         // an example that passes this Assert.)
-       c.Assert(NewRootSorter(service_roots, hash).GetSortedRoots()[0], Not(Equals), ks1[0].url)
+       c.Assert(NewRootSorter(localRoots, hash).GetSortedRoots()[0], Not(Equals), ks1[0].url)
  
         r, n, url2, err := kc.Get(hash)
  
@@ -634,16 +785,18 @@ func (s *StandaloneSuite) TestPutProxy(c *C) {
         kc.Want_replicas = 2
         kc.Using_proxy = true
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks1 := RunSomeFakeKeepServers(st, 1)
  
         for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
  
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         _, replicas, err := kc.PutB([]byte("foo"))
         <-st.handled
@@ -665,15 +818,17 @@ func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
         kc.Want_replicas = 3
         kc.Using_proxy = true
         arv.ApiToken = "abc123"
-       service_roots := make(map[string]string)
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
  
         ks1 := RunSomeFakeKeepServers(st, 1)
  
         for i, k := range ks1 {
-               service_roots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
                 defer k.listener.Close()
         }
-       kc.SetServiceRoots(service_roots)
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
  
         _, replicas, err := kc.PutB([]byte("foo"))
         <-st.handled
@@ -685,10 +840,111 @@ func (s *StandaloneSuite) TestPutProxyInsufficientReplicas(c *C) {
  }
  
  func (s *StandaloneSuite) TestMakeLocator(c *C) {
-       l := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+3+Aabcde@12345678")
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+3+Aabcde@12345678")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, 3)
+       c.Check(l.Hints, DeepEquals, []string{"3", "Aabcde@12345678"})
+}
  
+func (s *StandaloneSuite) TestMakeLocatorNoHints(c *C) {
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, -1)
+       c.Check(l.Hints, DeepEquals, []string{})
+}
+
+func (s *StandaloneSuite) TestMakeLocatorNoSizeHint(c *C) {
+       l, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31ce+Aabcde@12345678")
+       c.Check(err, Equals, nil)
+       c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
+       c.Check(l.Size, Equals, -1)
+       c.Check(l.Hints, DeepEquals, []string{"Aabcde@12345678"})
+}
+
+func (s *StandaloneSuite) TestMakeLocatorPreservesUnrecognizedHints(c *C) {
+       str := "91f372a266fe2bf2823cb8ec7fda31ce+3+Unknown+Kzzzzz+Afoobar"
+       l, err := MakeLocator(str)
+       c.Check(err, Equals, nil)
         c.Check(l.Hash, Equals, "91f372a266fe2bf2823cb8ec7fda31ce")
         c.Check(l.Size, Equals, 3)
-       c.Check(l.Signature, Equals, "abcde")
-       c.Check(l.Timestamp, Equals, "12345678")
+       c.Check(l.Hints, DeepEquals, []string{"3", "Unknown", "Kzzzzz", "Afoobar"})
+       c.Check(l.String(), Equals, str)
+}
+
+func (s *StandaloneSuite) TestMakeLocatorInvalidInput(c *C) {
+       _, err := MakeLocator("91f372a266fe2bf2823cb8ec7fda31c")
+       c.Check(err, Equals, InvalidLocatorError)
+}
+
+func (s *StandaloneSuite) TestPutBWant2ReplicasWithOnlyOneWritableLocalRoot(c *C) {
+       hash := Md5String("foo")
+
+       st := StubPutHandler{
+               c,
+               hash,
+               "abc123",
+               "foo",
+               make(chan string, 5)}
+
+       arv, _ := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+
+       kc.Want_replicas = 2
+       arv.ApiToken = "abc123"
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       ks := RunSomeFakeKeepServers(st, 5)
+
+       for i, k := range ks {
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               if i == 0 {
+                       writableLocalRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               }
+               defer k.listener.Close()
+       }
+
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
+
+       _, replicas, err := kc.PutB([]byte("foo"))
+
+       c.Check(err, Equals, InsufficientReplicasError)
+       c.Check(replicas, Equals, 1)
+
+       c.Check(<-st.handled, Equals, localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", 0)])
+}
+
+func (s *StandaloneSuite) TestPutBWithNoWritableLocalRoots(c *C) {
+       hash := Md5String("foo")
+
+       st := StubPutHandler{
+               c,
+               hash,
+               "abc123",
+               "foo",
+               make(chan string, 5)}
+
+       arv, _ := arvadosclient.MakeArvadosClient()
+       kc, _ := MakeKeepClient(&arv)
+
+       kc.Want_replicas = 2
+       arv.ApiToken = "abc123"
+       localRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       ks := RunSomeFakeKeepServers(st, 5)
+
+       for i, k := range ks {
+               localRoots[fmt.Sprintf("zzzzz-bi6l4-fakefakefake%03d", i)] = k.url
+               defer k.listener.Close()
+       }
+
+       kc.SetServiceRoots(localRoots, writableLocalRoots, nil)
+
+       _, replicas, err := kc.PutB([]byte("foo"))
+
+       c.Check(err, Equals, InsufficientReplicasError)
+       c.Check(replicas, Equals, 0)
  }
diff --git a/sdk/go/keepclient/root_sorter.go b/sdk/go/keepclient/root_sorter.go

index c2780bc716171e7e1b96efd76ec2ed1fa2af26d5..a8afea096109531ad0e27bf4c31e5b26010c8538 100644 (file)
--- a/sdk/go/keepclient/root_sorter.go
+++ b/sdk/go/keepclient/root_sorter.go
@@ -5,12 +5,12 @@ import (
  )
  
  type RootSorter struct {
-       root         []string
-       weight       []string
-       order        []int
+       root   []string
+       weight []string
+       order  []int
  }
  
-func NewRootSorter(serviceRoots map[string]string, hash string) (*RootSorter) {
+func NewRootSorter(serviceRoots map[string]string, hash string) *RootSorter {
         rs := new(RootSorter)
         rs.root = make([]string, len(serviceRoots))
         rs.weight = make([]string, len(serviceRoots))
@@ -26,7 +26,7 @@ func NewRootSorter(serviceRoots map[string]string, hash string) (*RootSorter) {
         return rs
  }
  
-func (rs RootSorter) getWeight(hash string, uuid string) (string) {
+func (rs RootSorter) getWeight(hash string, uuid string) string {
         if len(uuid) == 27 {
                 return Md5String(hash + uuid[12:])
         } else {
@@ -35,7 +35,7 @@ func (rs RootSorter) getWeight(hash string, uuid string) (string) {
         }
  }
  
-func (rs RootSorter) GetSortedRoots() ([]string) {
+func (rs RootSorter) GetSortedRoots() []string {
         sorted := make([]string, len(rs.order))
         for i := range rs.order {
                 sorted[i] = rs.root[rs.order[i]]
diff --git a/sdk/go/keepclient/root_sorter_test.go b/sdk/go/keepclient/root_sorter_test.go

index 455715d427ef8ac06f478adef20818058331a45a..23e8cc93f292a9a2196b09f4ac19558bb97e0dd4 100644 (file)
--- a/sdk/go/keepclient/root_sorter_test.go
+++ b/sdk/go/keepclient/root_sorter_test.go
@@ -8,19 +8,20 @@ import (
  )
  
  type RootSorterSuite struct{}
+
  var _ = Suite(&RootSorterSuite{})
  
-func FakeSvcRoot(i uint64) (string) {
+func FakeSvcRoot(i uint64) string {
         return fmt.Sprintf("https://%x.svc/", i)
  }
  
-func FakeSvcUuid(i uint64) (string) {
+func FakeSvcUuid(i uint64) string {
         return fmt.Sprintf("zzzzz-bi6l4-%015x", i)
  }
  
-func FakeServiceRoots(n uint64) (map[string]string) {
+func FakeServiceRoots(n uint64) map[string]string {
         sr := map[string]string{}
-       for i := uint64(0); i < n; i ++ {
+       for i := uint64(0); i < n; i++ {
                 sr[FakeSvcUuid(i)] = FakeSvcRoot(i)
         }
         return sr
diff --git a/sdk/go/keepclient/support.go b/sdk/go/keepclient/support.go

index 940a110081dbaa46920cb472ea09d9e0635fd219..b467d06b21ed91ecaa06b1537ae30a6cddcb6ce2 100644 (file)
--- a/sdk/go/keepclient/support.go
+++ b/sdk/go/keepclient/support.go
@@ -1,4 +1,3 @@
-/* Internal methods to support keepclient.go */
  package keepclient
  
  import (
@@ -21,6 +20,7 @@ type keepDisk struct {
         Port     int    `json:"service_port"`
         SSL      bool   `json:"service_ssl_flag"`
         SvcType  string `json:"service_type"`
+       ReadOnly bool   `json:"read_only"`
  }
  
  func Md5String(s string) string {
@@ -76,7 +76,7 @@ func (this *KeepClient) setClientSettingsStore() {
         }
  }
  
-func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
+func (this *KeepClient) DiscoverKeepServers() error {
         type svcList struct {
                 Items []keepDisk `json:"items"`
         }
@@ -86,31 +86,46 @@ func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
  
         if err != nil {
                 if err := this.Arvados.List("keep_disks", nil, &m); err != nil {
-                       return nil, err
+                       return err
                 }
         }
  
         listed := make(map[string]bool)
-       service_roots := make(map[string]string)
-
-       for _, element := range m.Items {
-               n := ""
-
-               if element.SSL {
-                       n = "s"
+       localRoots := make(map[string]string)
+       gatewayRoots := make(map[string]string)
+       writableLocalRoots := make(map[string]string)
+
+       for _, service := range m.Items {
+               scheme := "http"
+               if service.SSL {
+                       scheme = "https"
                 }
-
-               // Construct server URL
-               url := fmt.Sprintf("http%s://%s:%d", n, element.Hostname, element.Port)
+               url := fmt.Sprintf("%s://%s:%d", scheme, service.Hostname, service.Port)
  
                 // Skip duplicates
-               if !listed[url] {
-                       listed[url] = true
-                       service_roots[element.Uuid] = url
+               if listed[url] {
+                       continue
                 }
-               if element.SvcType == "proxy" {
+               listed[url] = true
+
+               switch service.SvcType {
+               case "disk":
+                       localRoots[service.Uuid] = url
+               case "proxy":
+                       localRoots[service.Uuid] = url
                         this.Using_proxy = true
                 }
+
+               if service.ReadOnly == false {
+                       writableLocalRoots[service.Uuid] = url
+               }
+
+               // Gateway services are only used when specified by
+               // UUID, so there's nothing to gain by filtering them
+               // by service type. Including all accessible services
+               // (gateway and otherwise) merely accommodates more
+               // service configurations.
+               gatewayRoots[service.Uuid] = url
         }
  
         if this.Using_proxy {
@@ -119,9 +134,8 @@ func (this *KeepClient) DiscoverKeepServers() (map[string]string, error) {
                 this.setClientSettingsStore()
         }
  
-       this.SetServiceRoots(service_roots)
-
-       return service_roots, nil
+       this.SetServiceRoots(localRoots, writableLocalRoots, gatewayRoots)
+       return nil
  }
  
  type uploadStatus struct {
@@ -204,7 +218,7 @@ func (this KeepClient) putReplicas(
         requestId := fmt.Sprintf("%x", md5.Sum([]byte(locator+time.Now().String())))[0:8]
  
         // Calculate the ordering for uploading to servers
-       sv := NewRootSorter(this.ServiceRoots(), hash).GetSortedRoots()
+       sv := NewRootSorter(this.WritableLocalRoots(), hash).GetSortedRoots()
  
         // The next server to try contacting
         next_server := 0
diff --git a/sdk/go/logger/logger.go b/sdk/go/logger/logger.go

index ce18e90ecfc3ac3134f997caf4634be74b9a0fa5..a989afcf26cb7a6d7b7bad6e4b1589d30e06520d 100644 (file)
--- a/sdk/go/logger/logger.go
+++ b/sdk/go/logger/logger.go
@@ -14,8 +14,11 @@
  //     entry map[string]interface{}) {
  //   // Modifiy properties and entry however you want
  //   // properties is a shortcut for entry["properties"].(map[string]interface{})
-//   // properties can take any values you want to give it,
-//   // entry will only take the fields listed at http://doc.arvados.org/api/schema/Log.html
+//   // properties can take any (valid) values you want to give it,
+//   // entry will only take the fields listed at
+//   // http://doc.arvados.org/api/schema/Log.html
+//   // Valid values for properties are anything that can be json
+//   // encoded (i.e. will not error if you call json.Marshal() on it.
  // })
  package logger
  
diff --git a/sdk/go/logger/main/testlogger.go b/sdk/go/logger/main/testlogger.go

deleted file mode 100644 (file)

index 6cd7dfb..0000000
--- a/sdk/go/logger/main/testlogger.go
+++ /dev/null
@@ -1,29 +0,0 @@
-// This binary tests the logger package.
-// It's not a standard unit test. Instead it writes to the actual log
-// and you have to clean up after it.
-
-package main
-
-import (
-       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/logger"
-       "log"
-)
-
-func main() {
-       arv, err := arvadosclient.MakeArvadosClient()
-       if err != nil {
-               log.Fatalf("Error setting up arvados client %v", err)
-       }
-
-       l := logger.NewLogger(logger.LoggerParams{Client: arv,
-               EventType: "experimental-logger-testing",
-               // No minimum write interval
-       })
-
-       {
-               properties, _ := l.Edit()
-               properties["Ninja"] = "Misha"
-       }
-       l.Record()
-}
diff --git a/sdk/go/logger/util.go b/sdk/go/logger/util.go

new file mode 100644 (file)

index 0000000..6425aca
--- /dev/null
+++ b/sdk/go/logger/util.go
@@ -0,0 +1,20 @@
+// Helper methods for interacting with Logger.
+package logger
+
+// Retrieves the map[string]interface{} stored at parent[key] if it
+// exists, otherwise it makes it and stores it there.
+// This is useful for logger because you may not know if a map you
+// need has already been created.
+func GetOrCreateMap(
+       parent map[string]interface{},
+       key string) (child map[string]interface{}) {
+       read, exists := parent[key]
+       if exists {
+               child = read.(map[string]interface{})
+
+       } else {
+               child = make(map[string]interface{})
+               parent[key] = child
+       }
+       return
+}
diff --git a/sdk/go/manifest/manifest.go b/sdk/go/manifest/manifest.go

index f6698c67d2f2436ff38b9e4c6f641e62f3b3110e..4e816cd73b30abbb7afce9f4b51d3767a897cfa8 100644 (file)
--- a/sdk/go/manifest/manifest.go
+++ b/sdk/go/manifest/manifest.go
@@ -5,27 +5,15 @@
  package manifest
  
  import (
-       "fmt"
         "git.curoverse.com/arvados.git/sdk/go/blockdigest"
         "log"
-       "regexp"
-       "strconv"
         "strings"
  )
  
-var LocatorPattern = regexp.MustCompile(
-       "^[0-9a-fA-F]{32}\\+[0-9]+(\\+[A-Z][A-Za-z0-9@_-]+)*$")
-
  type Manifest struct {
         Text string
  }
  
-type BlockLocator struct {
-       Digest blockdigest.BlockDigest
-       Size   int
-       Hints  []string
-}
-
  // Represents a single line from a manifest.
  type ManifestStream struct {
         StreamName string
@@ -33,40 +21,13 @@ type ManifestStream struct {
         Files      []string
  }
  
-func ParseBlockLocator(s string) (b BlockLocator, err error) {
-       if !LocatorPattern.MatchString(s) {
-               err = fmt.Errorf("String \"%s\" does not match BlockLocator pattern "+
-                       "\"%s\".",
-                       s,
-                       LocatorPattern.String())
-       } else {
-               tokens := strings.Split(s, "+")
-               var blockSize int64
-               var blockDigest blockdigest.BlockDigest
-               // We expect both of the following to succeed since LocatorPattern
-               // restricts the strings appropriately.
-               blockDigest, err = blockdigest.FromString(tokens[0])
-               if err != nil {
-                       return
-               }
-               blockSize, err = strconv.ParseInt(tokens[1], 10, 0)
-               if err != nil {
-                       return
-               }
-               b.Digest = blockDigest
-               b.Size = int(blockSize)
-               b.Hints = tokens[2:]
-       }
-       return
-}
-
  func parseManifestStream(s string) (m ManifestStream) {
         tokens := strings.Split(s, " ")
         m.StreamName = tokens[0]
         tokens = tokens[1:]
         var i int
         for i = range tokens {
-               if !LocatorPattern.MatchString(tokens[i]) {
+               if !blockdigest.IsBlockLocator(tokens[i]) {
                         break
                 }
         }
@@ -100,12 +61,12 @@ func (m *Manifest) StreamIter() <-chan ManifestStream {
  // Blocks may appear mulitple times within the same manifest if they
  // are used by multiple files. In that case this Iterator will output
  // the same block multiple times.
-func (m *Manifest) BlockIterWithDuplicates() <-chan BlockLocator {
-       blockChannel := make(chan BlockLocator)
+func (m *Manifest) BlockIterWithDuplicates() <-chan blockdigest.BlockLocator {
+       blockChannel := make(chan blockdigest.BlockLocator)
         go func(streamChannel <-chan ManifestStream) {
                 for m := range streamChannel {
                         for _, block := range m.Blocks {
-                               if b, err := ParseBlockLocator(block); err == nil {
+                               if b, err := blockdigest.ParseBlockLocator(block); err == nil {
                                         blockChannel <- b
                                 } else {
                                         log.Printf("ERROR: Failed to parse block: %v", err)
diff --git a/sdk/go/manifest/manifest_test.go b/sdk/go/manifest/manifest_test.go

index c1bfb14e1460757ed3991dc095e09c5fef46372e..8cfe3d907721e4f7d33048dfa16ef91e38dec12d 100644 (file)
--- a/sdk/go/manifest/manifest_test.go
+++ b/sdk/go/manifest/manifest_test.go
@@ -7,14 +7,14 @@ import (
         "testing"
  )
  
-func getStackTrace() (string) {
+func getStackTrace() string {
         buf := make([]byte, 1000)
         bytes_written := runtime.Stack(buf, false)
         return "Stack Trace:\n" + string(buf[:bytes_written])
  }
  
  func expectFromChannel(t *testing.T, c <-chan string, expected string) {
-       actual, ok := <- c
+       actual, ok := <-c
         if !ok {
                 t.Fatalf("Expected to receive %s but channel was closed. %s",
                         expected,
@@ -29,7 +29,7 @@ func expectFromChannel(t *testing.T, c <-chan string, expected string) {
  }
  
  func expectChannelClosed(t *testing.T, c <-chan interface{}) {
-       received, ok := <- c
+       received, ok := <-c
         if ok {
                 t.Fatalf("Expected channel to be closed, but received %v instead. %s",
                         received,
@@ -63,67 +63,17 @@ func expectManifestStream(t *testing.T, actual ManifestStream, expected Manifest
         expectStringSlicesEqual(t, actual.Files, expected.Files)
  }
  
-func expectBlockLocator(t *testing.T, actual BlockLocator, expected BlockLocator) {
+func expectBlockLocator(t *testing.T, actual blockdigest.BlockLocator, expected blockdigest.BlockLocator) {
         expectEqual(t, actual.Digest, expected.Digest)
         expectEqual(t, actual.Size, expected.Size)
         expectStringSlicesEqual(t, actual.Hints, expected.Hints)
  }
  
-func expectLocatorPatternMatch(t *testing.T, s string) {
-       if !LocatorPattern.MatchString(s) {
-               t.Fatalf("Expected \"%s\" to match locator pattern but it did not.",
-                       s)
-       }
-}
-
-func expectLocatorPatternFail(t *testing.T, s string) {
-       if LocatorPattern.MatchString(s) {
-               t.Fatalf("Expected \"%s\" to fail locator pattern but it passed.",
-                       s)
-       }
-}
-
-func TestLocatorPatternBasic(t *testing.T) {
-       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345")
-       expectLocatorPatternMatch(t, "A2345678901234abcdefababdeffdfdf+12345")
-       expectLocatorPatternMatch(t, "12345678901234567890123456789012+12345+A1")
-       expectLocatorPatternMatch(t,
-               "12345678901234567890123456789012+12345+A1+B123wxyz@_-")
-       expectLocatorPatternMatch(t,
-               "12345678901234567890123456789012+12345+A1+B123wxyz@_-+C@")
-
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+")
-       expectLocatorPatternFail(t,  "1234567890123456789012345678901+12345")
-       expectLocatorPatternFail(t,  "123456789012345678901234567890123+12345")
-       expectLocatorPatternFail(t,  "g2345678901234abcdefababdeffdfdf+12345")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345 ")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+1A")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+a1")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A1+B")
-       expectLocatorPatternFail(t,  "12345678901234567890123456789012+12345+A+B2")
-}
-
  func TestParseManifestStreamSimple(t *testing.T) {
         m := parseManifestStream(". 365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf 0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt")
         expectManifestStream(t, m, ManifestStream{StreamName: ".",
                 Blocks: []string{"365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"},
-               Files: []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
-}
-
-func TestParseBlockLocatorSimple(t *testing.T) {
-       b, err := ParseBlockLocator("365f83f5f808896ec834c8b595288735+2310+K@qr1hi+Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf")
-       if err != nil {
-               t.Fatalf("Unexpected error parsing block locator: %v", err)
-       }
-       expectBlockLocator(t, b, BlockLocator{Digest: blockdigest.AssertFromString("365f83f5f808896ec834c8b595288735"),
-               Size: 2310,
-               Hints: []string{"K@qr1hi",
-                       "Af0c9a66381f3b028677411926f0be1c6282fe67c@542b5ddf"}})
+               Files:  []string{"0:2310:qr1hi-8i9sb-ienvmpve1a0vpoi.log.txt"}})
  }
  
  func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
@@ -139,9 +89,9 @@ func TestStreamIterShortManifestWithBlankStreams(t *testing.T) {
                 firstStream,
                 ManifestStream{StreamName: ".",
                         Blocks: []string{"b746e3d2104645f2f64cd3cc69dd895d+15693477+E2866e643690156651c03d876e638e674dcd79475@5441920c"},
-                       Files: []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
+                       Files:  []string{"0:15893477:chr10_band0_s0_e3000000.fj"}})
  
-       received, ok := <- streamIter
+       received, ok := <-streamIter
         if ok {
                 t.Fatalf("Expected streamIter to be closed, but received %v instead.",
                         received)
@@ -159,20 +109,20 @@ func TestBlockIterLongManifest(t *testing.T) {
         firstBlock := <-blockChannel
         expectBlockLocator(t,
                 firstBlock,
-               BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
-                       Size: 15693477,
+               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("b746e3d2104645f2f64cd3cc69dd895d"),
+                       Size:  15693477,
                         Hints: []string{"E2866e643690156651c03d876e638e674dcd79475@5441920c"}})
         blocksRead := 1
-       var lastBlock BlockLocator
+       var lastBlock blockdigest.BlockLocator
         for lastBlock = range blockChannel {
                 //log.Printf("Blocks Read: %d", blocksRead)
-               blocksRead++
+               blocksRead++
         }
         expectEqual(t, blocksRead, 853)
  
         expectBlockLocator(t,
                 lastBlock,
-               BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
-                       Size: 31367794,
+               blockdigest.BlockLocator{Digest: blockdigest.AssertFromString("f9ce82f59e5908d2d70e18df9679b469"),
+                       Size:  31367794,
                         Hints: []string{"E53f903684239bcc114f7bf8ff9bd6089f33058db@5441920c"}})
  }
diff --git a/sdk/java/pom.xml b/sdk/java/pom.xml

index 53e8f756310a345ed3c85eae379f11a7f899107c..13e1c6a488401f0a652436744e4f85ae2c22a00f 100644 (file)
--- a/sdk/java/pom.xml
+++ b/sdk/java/pom.xml
@@ -1,12 +1,12 @@
  <project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
    <modelVersion>4.0.0</modelVersion>
-  <groupId>org.arvados.sdk.java</groupId>
-  <artifactId>java</artifactId>
+  <groupId>org.arvados.sdk</groupId>
+  <artifactId>arvados</artifactId>
    <packaging>jar</packaging>
-  <version>1.0-SNAPSHOT</version>
-  <name>java</name>
-  <url>http://maven.apache.org</url>
+  <version>1.1</version>
+  <name>arvados-sdk</name>
+  <url>http://arvados.org</url>
  
    <dependencies>
      <dependency>
@@ -48,7 +48,7 @@
    </dependencies>
  
    <build>
-    <finalName>arvados-sdk-1.0</finalName>
+    <finalName>arvados-sdk-1.1</finalName>
  
      <plugins>
        <plugin>
diff --git a/sdk/java/src/main/java/org/arvados/sdk/java/Arvados.java b/sdk/java/src/main/java/org/arvados/sdk/Arvados.java

similarity index 96%

rename from sdk/java/src/main/java/org/arvados/sdk/java/Arvados.java

rename to sdk/java/src/main/java/org/arvados/sdk/Arvados.java

index 2c03639a0af17d3e421dfe070fdb72a8cda82595..bef97ad58cd507c64f07713818caef623b43b9a6 100644 (file)
--- a/sdk/java/src/main/java/org/arvados/sdk/java/Arvados.java
+++ b/sdk/java/src/main/java/org/arvados/sdk/Arvados.java
@@ -1,8 +1,9 @@
-package org.arvados.sdk.java;
+package org.arvados.sdk;
  
  import com.google.api.client.http.javanet.*;
  import com.google.api.client.http.ByteArrayContent;
  import com.google.api.client.http.GenericUrl;
+import com.google.api.client.http.HttpBackOffIOExceptionHandler;
  import com.google.api.client.http.HttpContent;
  import com.google.api.client.http.HttpRequest;
  import com.google.api.client.http.HttpRequestFactory;
@@ -10,6 +11,7 @@ import com.google.api.client.http.HttpTransport;
  import com.google.api.client.http.UriTemplate;
  import com.google.api.client.json.JsonFactory;
  import com.google.api.client.json.jackson2.JacksonFactory;
+import com.google.api.client.util.ExponentialBackOff;
  import com.google.api.client.util.Maps;
  import com.google.api.services.discovery.Discovery;
  import com.google.api.services.discovery.model.JsonSchema;
@@ -33,10 +35,10 @@ import org.json.simple.JSONObject;
  
  /**
   * This class provides a java SDK interface to Arvados API server.
- * 
+ *
   * Please refer to http://doc.arvados.org/api/ to learn about the
   *  various resources and methods exposed by the API server.
- *  
+ *
   * @author radhika
   */
  public class Arvados {
@@ -79,7 +81,7 @@ public class Arvados {
      if (host != null) {
        arvadosApiHost = host;
      } else {
-      arvadosApiHost = System.getenv().get("ARVADOS_API_HOST");      
+      arvadosApiHost = System.getenv().get("ARVADOS_API_HOST");
        if (arvadosApiHost == null) {
          throw new Exception("Missing environment variable: ARVADOS_API_HOST");
        }
@@ -120,7 +122,7 @@ public class Arvados {
      HashMap<String, Object> parameters = loadParameters(paramsMap, method);
  
      GenericUrl url = new GenericUrl(UriTemplate.expand(
-        arvadosRootUrl + restDescription.getBasePath() + method.getPath(), 
+        arvadosRootUrl + restDescription.getBasePath() + method.getPath(),
          parameters, true));
  
      try {
@@ -145,6 +147,12 @@ public class Arvados {
        HttpRequest request =
            requestFactory.buildRequest(method.getHttpMethod(), url, content);
  
+      // Set read timeout to 120 seconds (up from default of 20 seconds)
+      request.setReadTimeout(120 * 1000);
+
+      // Add retry behavior
+      request.setIOExceptionHandler(new HttpBackOffIOExceptionHandler(new ExponentialBackOff()));
+
        // make the request
        List<String> authHeader = new ArrayList<String>();
        authHeader.add("OAuth2 " + arvadosApiToken);
@@ -212,7 +220,7 @@ public class Arvados {
                if (Boolean.TRUE.equals(required)) {
                  requiredParameters.add(property);
                } else {
-                optionalParameters.add(property);                
+                optionalParameters.add(property);
                }
              }
            }
@@ -282,7 +290,7 @@ public class Arvados {
      Map<String, RestMethod> methodMap = getMatchingMethodMap(resourceName);
  
      if (methodName == null) {
-      error("missing method name");      
+      error("missing method name");
      }
  
      RestMethod method =
@@ -297,7 +305,7 @@ public class Arvados {
    private Map<String, RestMethod> getMatchingMethodMap(String resourceName)
        throws Exception {
      if (resourceName == null) {
-      error("missing resource name");      
+      error("missing resource name");
      }
  
      Map<String, RestMethod> methodMap = null;
diff --git a/sdk/java/src/main/java/org/arvados/sdk/java/MethodDetails.java b/sdk/java/src/main/java/org/arvados/sdk/MethodDetails.java

similarity index 94%

rename from sdk/java/src/main/java/org/arvados/sdk/java/MethodDetails.java

rename to sdk/java/src/main/java/org/arvados/sdk/MethodDetails.java

index 247924663c5ed0edbf4cdbf32a91e7ba8dbd6a48..d77b7f1e9a12d7a7d741035f349652e1a5528370 100644 (file)
--- a/sdk/java/src/main/java/org/arvados/sdk/java/MethodDetails.java
+++ b/sdk/java/src/main/java/org/arvados/sdk/MethodDetails.java
@@ -1,4 +1,4 @@
-package org.arvados.sdk.java;
+package org.arvados.sdk;
  
  import com.google.api.client.util.Lists;
  import com.google.api.client.util.Sets;
@@ -19,4 +19,4 @@ public class MethodDetails implements Comparable<MethodDetails> {
        }
        return name.compareTo(o.name);
      }
-}
-\ No newline at end of file
+}
diff --git a/sdk/pam/.dockerignore b/sdk/pam/.dockerignore

new file mode 100644 (file)

index 0000000..86ec754
--- /dev/null
+++ b/sdk/pam/.dockerignore
@@ -0,0 +1,6 @@
+*~
+*.pyc
+.eggs
+*.egg_info
+build
+tmp
diff --git a/sdk/pam/.gitignore b/sdk/pam/.gitignore

new file mode 120000 (symlink)

index 0000000..1399fd4
--- /dev/null
+++ b/sdk/pam/.gitignore
@@ -0,0 +1 @@
+../python/.gitignore
+\ No newline at end of file
diff --git a/sdk/pam/Dockerfile b/sdk/pam/Dockerfile

new file mode 100644 (file)

index 0000000..5cee5cc
--- /dev/null
+++ b/sdk/pam/Dockerfile
@@ -0,0 +1,52 @@
+# These tests assume you have a real API server running on the docker host.
+#
+# Build the test container:
+#   First, replace 3000 below with your api server's port number if necessary.
+#   host$ python setup.py sdist rotate --keep=1 --match .tar.gz
+#   host$ docker build --tag=arvados/pam_test .
+#
+# Automated integration test:
+#   host$ docker run -it --add-host zzzzz.arvadosapi.com:"$(hostname -I |awk '{print $1}')" arvados/pam_test
+# You should see "=== OK ===", followed by a Perl stack trace due to a
+# yet-unidentified pam_python.so bug.
+#
+# Manual integration test:
+#   host$ docker run -it --add-host zzzzz.arvadosapi.com:"$(hostname -I |awk '{print $1}')" arvados/pam_test bash -c 'rsyslogd & tail -F /var/log/auth.log & sleep 1 & bash'
+#   container# login
+#   login: active
+#   Arvados API token: 3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi
+# You should now be logged in to the "active" shell account in the
+# container. You should also see arvados_pam log entries in
+# /var/log/auth.log (and in your terminal, thanks to "tail -F").
+
+FROM debian:wheezy
+RUN apt-get update
+RUN apt-get -qy dist-upgrade
+RUN apt-get -qy install python python-virtualenv libpam-python rsyslog
+# Packages required by pycurl, ciso8601
+RUN apt-get -qy install libcurl4-gnutls-dev python2.7-dev
+
+# for jessie (which also has other snags)
+# RUN apt-get -qy install python-pip libgnutls28-dev
+
+RUN pip install --upgrade setuptools
+RUN pip install python-pam
+ADD dist /dist
+RUN pip install /dist/arvados-pam-*.tar.gz
+
+# Configure and enable the module (hopefully vendor packages will offer a neater way)
+RUN perl -pi -e 's{api.example}{zzzzz.arvadosapi.com:3000}; s{shell\.example}{testvm2.shell insecure};' /usr/share/pam-configs/arvados
+RUN DEBIAN_FRONTEND=noninteractive pam-auth-update arvados --remove unix
+
+# Add a user account matching the fixture
+RUN useradd -ms /bin/bash active
+
+# Test with python (SIGSEGV during tests)
+#ADD . /pam
+#WORKDIR /pam
+#CMD rsyslogd & tail -F /var/log/auth.log & python setup.py test --test-suite integration_tests
+
+# Test with perl (SIGSEGV when program exits)
+RUN apt-get install -qy libauthen-pam-perl
+ADD tests/integration_test.pl /integration_test.pl
+CMD rsyslogd & tail -F /var/log/auth.log & sleep 1 && /integration_test.pl
diff --git a/sdk/pam/MANIFEST.in b/sdk/pam/MANIFEST.in

new file mode 100644 (file)

index 0000000..c17568c
--- /dev/null
+++ b/sdk/pam/MANIFEST.in
@@ -0,0 +1,4 @@
+include README.rst
+include examples/shellinabox
+include lib/libpam_arvados.py
+include pam-configs/arvados
diff --git a/sdk/pam/README.rst b/sdk/pam/README.rst

new file mode 100644 (file)

index 0000000..fdf1f8e
--- /dev/null
+++ b/sdk/pam/README.rst
@@ -0,0 +1,21 @@
+==================
+Arvados PAM Module
+==================
+
+Overview
+--------
+
+Accept Arvados API tokens to authenticate to shell accounts.
+
+.. _Arvados: https://arvados.org
+
+Installation
+------------
+
+See http://doc.arvados.org
+
+Testing and Development
+-----------------------
+
+https://arvados.org/projects/arvados/wiki/Hacking
+describes how to set up a development environment and run tests.
diff --git a/sdk/pam/arvados_pam/__init__.py b/sdk/pam/arvados_pam/__init__.py

new file mode 100644 (file)

index 0000000..087ea2e
--- /dev/null
+++ b/sdk/pam/arvados_pam/__init__.py
@@ -0,0 +1,51 @@
+import sys
+sys.argv=['']
+
+from . import auth_event
+
+def pam_sm_authenticate(pamh, flags, argv):
+    config = {}
+    config['arvados_api_host'] = argv[1]
+    config['virtual_machine_hostname'] = argv[2]
+    if len(argv) > 3:
+        for k in argv[3:]:
+            config[k] = True
+
+    try:
+        username = pamh.get_user(None)
+    except pamh.exception, e:
+        return e.pam_result
+
+    if not username:
+        return pamh.PAM_USER_UNKNOWN
+
+    try:
+        prompt = '' if config.get('noprompt') else 'Arvados API token: '
+        token = pamh.conversation(pamh.Message(pamh.PAM_PROMPT_ECHO_OFF, prompt)).resp
+    except pamh.exception as e:
+        return e.pam_result
+
+    if auth_event.AuthEvent(
+            config=config,
+            service=pamh.service,
+            client_host=pamh.rhost,
+            username=username,
+            token=token).can_login():
+        return pamh.PAM_SUCCESS
+    else:
+        return pamh.PAM_AUTH_ERR
+
+def pam_sm_setcred(pamh, flags, argv):
+    return pamh.PAM_SUCCESS
+
+def pam_sm_acct_mgmt(pamh, flags, argv):
+    return pamh.PAM_SUCCESS
+
+def pam_sm_open_session(pamh, flags, argv):
+    return pamh.PAM_SUCCESS
+
+def pam_sm_close_session(pamh, flags, argv):
+    return pamh.PAM_SUCCESS
+
+def pam_sm_chauthtok(pamh, flags, argv):
+    return pamh.PAM_SUCCESS
diff --git a/sdk/pam/arvados_pam/auth_event.py b/sdk/pam/arvados_pam/auth_event.py

new file mode 100644 (file)

index 0000000..8abd9c5
--- /dev/null
+++ b/sdk/pam/arvados_pam/auth_event.py
@@ -0,0 +1,88 @@
+import arvados
+import syslog
+
+def auth_log(msg):
+    """Log an authentication result to syslogd"""
+    syslog.openlog(facility=syslog.LOG_AUTH)
+    syslog.syslog('arvados_pam: ' + msg)
+    syslog.closelog()
+
+class AuthEvent(object):
+    def __init__(self, config, service, client_host, username, token):
+        self.config = config
+        self.service = service
+        self.client_host = client_host
+        self.username = username
+        self.token = token
+
+        self.api_host = None
+        self.vm_uuid = None
+        self.user = None
+
+    def can_login(self):
+        """Return truthy IFF credentials should be accepted."""
+        ok = False
+        try:
+            self.api_host = self.config['arvados_api_host']
+            self.arv = arvados.api('v1', host=self.api_host, token=self.token,
+                                   insecure=self.config.get('insecure', False),
+                                   cache=False)
+
+            vmname = self.config['virtual_machine_hostname']
+            vms = self.arv.virtual_machines().list(filters=[['hostname','=',vmname]]).execute()
+            if vms['items_available'] > 1:
+                raise Exception("lookup hostname %s returned %d records" % (vmname, vms['items_available']))
+            if vms['items_available'] == 0:
+                raise Exception("lookup hostname %s not found" % vmname)
+            vm = vms['items'][0]
+            if vm['hostname'] != vmname:
+                raise Exception("lookup hostname %s returned hostname %s" % (vmname, vm['hostname']))
+            self.vm_uuid = vm['uuid']
+
+            self.user = self.arv.users().current().execute()
+
+            filters = [
+                ['link_class','=','permission'],
+                ['name','=','can_login'],
+                ['head_uuid','=',self.vm_uuid],
+                ['tail_uuid','=',self.user['uuid']]]
+            for l in self.arv.links().list(filters=filters, limit=10000).execute()['items']:
+                if (l['properties']['username'] == self.username and
+                    l['tail_uuid'] == self.user['uuid'] and
+                    l['head_uuid'] == self.vm_uuid and
+                    l['link_class'] == 'permission' and
+                    l['name'] == 'can_login'):
+                    return self._report(True)
+
+            return self._report(False)
+
+        except Exception as e:
+            return self._report(e)
+
+    def _report(self, result):
+        """Log the result. Return truthy IFF result is True.
+
+        result must be True, False, or an exception.
+        """
+        self.result = result
+        auth_log(self.message())
+        return result == True
+
+    def message(self):
+        """Return a log message describing the event and its outcome."""
+        if isinstance(self.result, Exception):
+            outcome = 'Error: ' + repr(self.result)
+        elif self.result == True:
+            outcome = 'Allow'
+        else:
+            outcome = 'Deny'
+
+        if len(self.token) > 40:
+            log_token = self.token[0:15]
+        else:
+            log_token = '<invalid>'
+
+        log_label = [self.service, self.api_host, self.vm_uuid, self.client_host, self.username, log_token]
+        if self.user:
+            log_label += [self.user.get('uuid'), self.user.get('full_name')]
+        return str(log_label) + ': ' + outcome
diff --git a/sdk/pam/examples/shellinabox b/sdk/pam/examples/shellinabox

new file mode 100644 (file)

index 0000000..2d91ccb
--- /dev/null
+++ b/sdk/pam/examples/shellinabox
@@ -0,0 +1,27 @@
+# This example is a stock debian "login" file with libpam_arvados
+# replacing pam_unix, and the "noprompt" option in use. It can be
+# installed as /etc/pam.d/shellinabox .
+
+auth       optional   pam_faildelay.so  delay=3000000
+auth [success=ok new_authtok_reqd=ok ignore=ignore user_unknown=bad default=die] pam_securetty.so
+auth       requisite  pam_nologin.so
+session [success=ok ignore=ignore module_unknown=ignore default=bad] pam_selinux.so close
+session       required   pam_env.so readenv=1
+session       required   pam_env.so readenv=1 envfile=/etc/default/locale
+
+auth [success=1 default=ignore] pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example noprompt
+auth   requisite                       pam_deny.so
+auth   required                        pam_permit.so
+
+auth       optional   pam_group.so
+session    required   pam_limits.so
+session    optional   pam_lastlog.so
+session    optional   pam_motd.so  motd=/run/motd.dynamic
+session    optional   pam_motd.so
+session    optional   pam_mail.so standard
+
+@include common-account
+@include common-session
+@include common-password
+
+session [success=ok ignore=ignore module_unknown=ignore default=bad] pam_selinux.so open
diff --git a/sdk/pam/fpm-info.sh b/sdk/pam/fpm-info.sh

new file mode 100644 (file)

index 0000000..35f3d5a
--- /dev/null
+++ b/sdk/pam/fpm-info.sh
@@ -0,0 +1,15 @@
+case "$TARGET" in
+    debian* | ubuntu*)
+        fpm_depends+=('libpam-python')
+        ;;
+    *)
+        echo >&2 "ERROR: $PACKAGE: pam_python.so dependency unavailable in $TARGET."
+        return 1
+        ;;
+esac
+
+case "$FORMAT" in
+    deb)
+        fpm_args+=('--deb-recommends=system-log-daemon')
+        ;;
+esac
diff --git a/sdk/pam/gittaggers.py b/sdk/pam/gittaggers.py

new file mode 120000 (symlink)

index 0000000..d59c02c
--- /dev/null
+++ b/sdk/pam/gittaggers.py
@@ -0,0 +1 @@
+../python/gittaggers.py
+\ No newline at end of file
diff --git a/sdk/pam/integration_tests/__init__.py b/sdk/pam/integration_tests/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/sdk/pam/integration_tests/test_pam.py b/sdk/pam/integration_tests/test_pam.py

new file mode 100644 (file)

index 0000000..cfc915c
--- /dev/null
+++ b/sdk/pam/integration_tests/test_pam.py
@@ -0,0 +1,26 @@
+"""These tests assume we are running (in a docker container) with
+arvados_pam configured and a test API server running.
+"""
+import pam
+import unittest
+
+# From services/api/test/fixtures/api_client_authorizations.yml
+# because that file is not available during integration tests:
+ACTIVE_TOKEN = '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'
+SPECTATOR_TOKEN = 'zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu'
+
+class IntegrationTest(unittest.TestCase):
+    def setUp(self):
+        self.p = pam.pam()
+
+    def test_allow(self):
+        self.assertTrue(self.p.authenticate('active', ACTIVE_TOKEN, service='login'))
+
+    def test_deny_bad_token(self):
+        self.assertFalse(self.p.authenticate('active', 'thisisaverybadtoken', service='login'))
+
+    def test_deny_empty_token(self):
+        self.assertFalse(self.p.authenticate('active', '', service='login'))
+
+    def test_deny_permission(self):
+        self.assertFalse(self.p.authenticate('spectator', SPECTATOR_TOKEN, service='login'))
diff --git a/sdk/pam/lib/libpam_arvados.py b/sdk/pam/lib/libpam_arvados.py

new file mode 100644 (file)

index 0000000..deead7e
--- /dev/null
+++ b/sdk/pam/lib/libpam_arvados.py
@@ -0,0 +1 @@
+from arvados_pam import *
diff --git a/sdk/pam/pam-configs/arvados b/sdk/pam/pam-configs/arvados

new file mode 100644 (file)

index 0000000..086e176
--- /dev/null
+++ b/sdk/pam/pam-configs/arvados
@@ -0,0 +1,14 @@
+# 1. Change "api.example" to your ARVADOS_API_HOST
+# 2. Change "shell.example" to this host's hostname
+#    (as it appears in the Arvados virtual_machines list)
+# 3. Install in /usr/share/pam-configs/arvados
+# 4. Run `pam-auth-update arvados`
+
+Name: Arvados authentication
+Default: yes
+Priority: 256
+Auth-Type: Primary
+Auth:
+       [success=end default=ignore]    pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example
+Auth-Initial:
+       [success=end default=ignore]    pam_python.so /usr/local/lib/security/libpam_arvados.py api.example shell.example
diff --git a/sdk/pam/setup.py b/sdk/pam/setup.py

new file mode 100755 (executable)

index 0000000..4915388
--- /dev/null
+++ b/sdk/pam/setup.py
@@ -0,0 +1,46 @@
+#!/usr/bin/env python
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+import subprocess
+
+from setuptools import setup, find_packages
+
+SETUP_DIR = os.path.dirname(__file__) or '.'
+README = os.path.join(SETUP_DIR, 'README.rst')
+
+tagger = egg_info_cmd.egg_info
+try:
+    import gittaggers
+    tagger = gittaggers.EggInfoFromGit
+except (ImportError, OSError):
+    pass
+
+setup(name='arvados-pam',
+      version='0.1',
+      description='Arvados PAM module',
+      long_description=open(README).read(),
+      author='Arvados',
+      author_email='info@arvados.org',
+      url='https://arvados.org',
+      download_url='https://github.com/curoverse/arvados.git',
+      license='Apache 2.0',
+      packages=[
+          'arvados_pam',
+      ],
+      scripts=[
+      ],
+      data_files=[
+          ('lib/security', ['lib/libpam_arvados.py']),
+          ('share/arvados-pam', ['examples/shellinabox']),
+          ('share/pam-configs', ['pam-configs/arvados']),
+      ],
+      install_requires=[
+          'arvados-python-client>=0.1.20150801000000',
+      ],
+      test_suite='tests',
+      tests_require=['mock>=1.0', 'python-pam'],
+      zip_safe=False,
+      cmdclass={'egg_info': tagger},
+      )
diff --git a/sdk/pam/tests/__init__.py b/sdk/pam/tests/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/sdk/pam/tests/integration_test.pl b/sdk/pam/tests/integration_test.pl

new file mode 100755 (executable)

index 0000000..e5dff1e
--- /dev/null
+++ b/sdk/pam/tests/integration_test.pl
@@ -0,0 +1,46 @@
+#!/usr/bin/env perl
+
+$ENV{ARVADOS_API_HOST_INSECURE} = 1;
+use Authen::PAM qw(:constants);
+
+for my $case (['good', 1, 'active', '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'],
+              ['badtoken', 0, 'active', 'badtokenmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'],
+              ['badusername', 0, 'baduser', '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi']) {
+    dotest(@$case);
+}
+print "=== OK ===\n";
+
+sub dotest {
+    my ($label, $expect_ok, $user, $token) = @_;
+    print "$label: ";
+    my $service_name = 'login';
+    $main::Token = $token;
+    my $pamh = new Authen::PAM($service_name, $user, \&token_conv_func);
+    ref($pamh) || die "Error code $pamh during PAM init!";
+    $pamh->pam_set_item(PAM_RHOST(), '::1');
+    $pamh->pam_set_item(PAM_RUSER(), 'none');
+    $pamh->pam_set_item(PAM_TTY(), '/dev/null');
+    my $flags = PAM_SILENT();
+    $res = $pamh->pam_authenticate($flags);
+    $msg = $pamh->pam_strerror($res);
+    print "Result (code $res): $msg\n";
+    if (($res == 0) != ($expect_ok == 1)) {
+        die "*** FAIL ***\n";
+    }
+}
+
+sub token_conv_func {
+    my @res;
+    while ( @_ ) {
+        my $code = shift;
+        my $msg = shift;
+        my $ans;
+        print "Message (type $code): $msg\n";
+        if ($code == PAM_PROMPT_ECHO_OFF() || $code == PAM_PROMPT_ECHO_ON()) {
+            $ans = $main::Token;
+        }
+        push @res, (0,$ans);
+    }
+    push @res, PAM_SUCCESS();
+    return @res;
+}
diff --git a/sdk/pam/tests/mocker.py b/sdk/pam/tests/mocker.py

new file mode 100644 (file)

index 0000000..76c1ea3
--- /dev/null
+++ b/sdk/pam/tests/mocker.py
@@ -0,0 +1,59 @@
+import mock
+import unittest
+
+class Mocker(unittest.TestCase):
+    ACTIVE_TOKEN = '3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi'
+
+    default_config = {
+        'arvados_api_host': 'zzzzz.api_host.example',
+        'virtual_machine_hostname': 'testvm2.shell',
+    }
+    default_request = {
+        'client_host': '::1',
+        'token': ACTIVE_TOKEN,
+        'username': 'active',
+    }
+    default_response = {
+        'links': {
+            'items': [{
+                'uuid': 'zzzzz-o0j2j-rah2ya1ohx9xaev',
+                'tail_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
+                'head_uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                'link_class': 'permission',
+                'name': 'can_login',
+                'properties': {
+                    'username': 'active',
+                },
+            }],
+        },
+        'users': {
+            'uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
+            'full_name': 'Active User',
+        },
+        'virtual_machines': {
+            'items': [{
+                'uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                'hostname': 'testvm2.shell',
+            }],
+            'items_available': 1,
+        },
+    }
+
+    def setUp(self):
+        self.config = self.default_config.copy()
+        self.request = self.default_request.copy()
+        self.response = self.default_response.copy()
+        self.api_client = mock.MagicMock(name='api_client')
+        self.api_client.users().current().execute.side_effect = lambda: self.response['users']
+        self.api_client.virtual_machines().list().execute.side_effect = lambda: self.response['virtual_machines']
+        self.api_client.links().list().execute.side_effect = lambda: self.response['links']
+        patcher = mock.patch('arvados.api')
+        self.api = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.api.side_effect = [self.api_client]
+
+        self.syslogged = []
+        patcher = mock.patch('syslog.syslog')
+        self.syslog = patcher.start()
+        self.addCleanup(patcher.stop)
+        self.syslog.side_effect = lambda s: self.syslogged.append(s)
diff --git a/sdk/pam/tests/test_auth_event.py b/sdk/pam/tests/test_auth_event.py

new file mode 100644 (file)

index 0000000..3fb6d74
--- /dev/null
+++ b/sdk/pam/tests/test_auth_event.py
@@ -0,0 +1,95 @@
+import arvados_pam
+import re
+from . import mocker
+
+class AuthEventTest(mocker.Mocker):
+    def attempt(self):
+        return arvados_pam.auth_event.AuthEvent(config=self.config, service='test_service', **self.request).can_login()
+
+    def test_success(self):
+        self.assertTrue(self.attempt())
+
+        self.api_client.virtual_machines().list.assert_called_with(
+            filters=[['hostname','=',self.config['virtual_machine_hostname']]])
+        self.api.assert_called_with(
+            'v1',
+            host=self.config['arvados_api_host'], token=self.request['token'],
+            insecure=False,
+            cache=False)
+        self.assertEqual(1, len(self.syslogged))
+        for i in ['test_service',
+                  self.request['username'],
+                  self.config['arvados_api_host'],
+                  self.response['virtual_machines']['items'][0]['uuid']]:
+            self.assertRegexpMatches(self.syslogged[0], re.escape(i))
+        self.assertRegexpMatches(self.syslogged[0], re.escape(self.request['token'][0:15]), 'token prefix not logged')
+        self.assertNotRegexpMatches(self.syslogged[0], re.escape(self.request['token'][15:30]), 'too much token logged')
+
+    def test_fail_vm_lookup(self):
+        self.api_client.virtual_machines().list().execute.side_effect = Exception("Test-induced failure")
+        self.assertFalse(self.attempt())
+        self.assertRegexpMatches(self.syslogged[0], 'Test-induced failure')
+
+    def test_vm_hostname_not_found(self):
+        self.response['virtual_machines'] = {
+            'items': [],
+            'items_available': 0,
+        }
+        self.assertFalse(self.attempt())
+
+    def test_vm_hostname_ambiguous(self):
+        self.response['virtual_machines'] = {
+            'items': [
+                {
+                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                    'hostname': 'testvm2.shell',
+                },
+                {
+                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                    'hostname': 'testvm2.shell',
+                },
+            ],
+            'items_available': 2,
+        }
+        self.assertFalse(self.attempt())
+
+    def test_server_ignores_vm_filters(self):
+        self.response['virtual_machines'] = {
+            'items': [
+                {
+                    'uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                    'hostname': 'testvm22.shell', # <-----
+                },
+            ],
+            'items_available': 1,
+        }
+        self.assertFalse(self.attempt())
+
+    def test_fail_user_lookup(self):
+        self.api_client.users().current().execute.side_effect = Exception("Test-induced failure")
+        self.assertFalse(self.attempt())
+
+    def test_fail_permission_check(self):
+        self.api_client.links().list().execute.side_effect = Exception("Test-induced failure")
+        self.assertFalse(self.attempt())
+
+    def test_no_login_permission(self):
+        self.response['links'] = {
+            'items': [],
+        }
+        self.assertFalse(self.attempt())
+
+    def test_server_ignores_permission_filters(self):
+        self.response['links'] = {
+            'items': [{
+                'uuid': 'zzzzz-o0j2j-rah2ya1ohx9xaev',
+                'tail_uuid': 'zzzzz-tpzed-xurymjxw79nv3jz',
+                'head_uuid': 'zzzzz-2x53u-382brsig8rp3065',
+                'link_class': 'permission',
+                'name': 'CANT_login', # <-----
+                'properties': {
+                    'username': 'active',
+                },
+            }],
+        }
+        self.assertFalse(self.attempt())
diff --git a/sdk/pam/tests/test_pam_sm.py b/sdk/pam/tests/test_pam_sm.py

new file mode 100644 (file)

index 0000000..a967493
--- /dev/null
+++ b/sdk/pam/tests/test_pam_sm.py
@@ -0,0 +1,26 @@
+import arvados_pam
+import mock
+from . import mocker
+
+class PamSMTest(mocker.Mocker):
+    def attempt(self):
+        return arvados_pam.pam_sm_authenticate(self.pamh, 0, self.argv)
+
+    def test_success(self):
+        self.assertEqual(self.pamh.PAM_SUCCESS, self.attempt())
+
+    def test_bad_user(self):
+        self.pamh.get_user = mock.MagicMock(return_value='badusername')
+        self.assertEqual(self.pamh.PAM_AUTH_ERR, self.attempt())
+
+    def test_bad_vm(self):
+        self.argv[2] = 'testvm22.shell'
+        self.assertEqual(self.pamh.PAM_AUTH_ERR, self.attempt())
+
+    def setUp(self):
+        super(PamSMTest, self).setUp()
+        self.pamh = mock.MagicMock()
+        self.pamh.get_user = mock.MagicMock(return_value='active')
+        self.pamh.PAM_SUCCESS = 12345
+        self.pamh.PAM_AUTH_ERR = 54321
+        self.argv = [__file__, 'zzzzz.arvadosapi.com', 'testvm2.shell']
diff --git a/sdk/perl/Makefile.PL b/sdk/perl/Makefile.PL

index 21e31ad8055ce27e84f303c538789da050c021f6..d676d37a8a7dfb54fbb0429a548b4e5b90cac6e4 100644 (file)
--- a/sdk/perl/Makefile.PL
+++ b/sdk/perl/Makefile.PL
@@ -6,5 +6,10 @@ use ExtUtils::MakeMaker;
  
  WriteMakefile(
      NAME            => 'Arvados',
-    VERSION_FROM    => 'lib/Arvados.pm'
+    VERSION_FROM    => 'lib/Arvados.pm',
+    PREREQ_PM       => {
+        'JSON'     => 0,
+        'LWP'      => 0,
+        'Net::SSL' => 0,
+    },
  );
diff --git a/sdk/python/arvados/_ranges.py b/sdk/python/arvados/_ranges.py

index 947b35fe0f5ac5bad581a7dbaf4ab1e28cb33809..83437b2adb9f7817ac0b5ee210cfdb6d50915b90 100644 (file)
--- a/sdk/python/arvados/_ranges.py
+++ b/sdk/python/arvados/_ranges.py
@@ -2,6 +2,9 @@ import logging
  
  _logger = logging.getLogger('arvados.ranges')
  
+# Log level below 'debug' !
+RANGES_SPAM = 9
+
  class Range(object):
      def __init__(self, locator, range_start, range_size, segment_offset=0):
          self.locator = locator
@@ -18,7 +21,7 @@ class Range(object):
                  self.range_size == other.range_size and
                  self.segment_offset == other.segment_offset)
  
-def first_block(data_locators, range_start, range_size):
+def first_block(data_locators, range_start):
      block_start = 0L
  
      # range_start/block_start is the inclusive lower bound
@@ -65,7 +68,7 @@ class LocatorAndRange(object):
      def __repr__(self):
          return "LocatorAndRange(%r, %r, %r, %r)" % (self.locator, self.block_size, self.segment_offset, self.segment_size)
  
-def locators_and_ranges(data_locators, range_start, range_size):
+def locators_and_ranges(data_locators, range_start, range_size, limit=None):
      """Get blocks that are covered by a range.
  
      Returns a list of LocatorAndRange objects.
@@ -79,24 +82,31 @@ def locators_and_ranges(data_locators, range_start, range_size):
      :range_size:
        size of range
  
+    :limit:
+      Maximum segments to return, default None (unlimited).  Will truncate the
+      result if there are more segments needed to cover the range than the
+      limit.
+
      """
      if range_size == 0:
          return []
      resp = []
      range_end = range_start + range_size
  
-    i = first_block(data_locators, range_start, range_size)
+    i = first_block(data_locators, range_start)
      if i is None:
          return []
  
      # We should always start at the first segment due to the binary
      # search.
-    while i < len(data_locators):
+    while i < len(data_locators) and len(resp) != limit:
          dl = data_locators[i]
          block_start = dl.range_start
          block_size = dl.range_size
          block_end = block_start + block_size
-        _logger.debug(dl.locator, "range_start", range_start, "block_start", block_start, "range_end", range_end, "block_end", block_end)
+        _logger.log(RANGES_SPAM,
+            "%s range_start %s block_start %s range_end %s block_end %s",
+            dl.locator, range_start, block_start, range_end, block_end)
          if range_end <= block_start:
              # range ends before this block starts, so don't look at any more locators
              break
@@ -158,7 +168,7 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n
              data_locators.append(Range(new_locator, new_range_start, new_range_size, new_segment_offset))
          return
  
-    i = first_block(data_locators, new_range_start, new_range_size)
+    i = first_block(data_locators, new_range_start)
      if i is None:
          return
  
@@ -168,7 +178,10 @@ def replace_range(data_locators, new_range_start, new_range_size, new_locator, n
          dl = data_locators[i]
          old_segment_start = dl.range_start
          old_segment_end = old_segment_start + dl.range_size
-        _logger.debug(dl, "range_start", new_range_start, "segment_start", old_segment_start, "range_end", new_range_end, "segment_end", old_segment_end)
+        _logger.log(RANGES_SPAM,
+            "%s range_start %s segment_start %s range_end %s segment_end %s",
+            dl, new_range_start, old_segment_start, new_range_end,
+            old_segment_end)
          if new_range_end <= old_segment_start:
              # range ends before this segment starts, so don't look at any more locators
              break
diff --git a/sdk/python/arvados/api.py b/sdk/python/arvados/api.py

index 3968f0185e1a193a6b233c5033be4a0b7ad8794e..086487aa09714e2873e2b49ac7cafff222ea0d1b 100644 (file)
--- a/sdk/python/arvados/api.py
+++ b/sdk/python/arvados/api.py
@@ -1,3 +1,4 @@
+import collections
  import httplib2
  import json
  import logging
@@ -14,6 +15,26 @@ import util
  
  _logger = logging.getLogger('arvados.api')
  
+class OrderedJsonModel(apiclient.model.JsonModel):
+    """Model class for JSON that preserves the contents' order.
+
+    API clients that care about preserving the order of fields in API
+    server responses can use this model to do so, like this::
+
+        from arvados.api import OrderedJsonModel
+        client = arvados.api('v1', ..., model=OrderedJsonModel())
+    """
+
+    def deserialize(self, content):
+        # This is a very slightly modified version of the parent class'
+        # implementation.  Copyright (c) 2010 Google.
+        content = content.decode('utf-8')
+        body = json.loads(content, object_pairs_hook=collections.OrderedDict)
+        if self._data_wrapper and isinstance(body, dict) and 'data' in body:
+            body = body['data']
+        return body
+
+
  def _intercept_http_request(self, uri, **kwargs):
      from httplib import BadStatusLine
  
@@ -69,7 +90,10 @@ def _new_http_error(cls, *args, **kwargs):
  apiclient_errors.HttpError.__new__ = staticmethod(_new_http_error)
  
  def http_cache(data_type):
-    path = os.environ['HOME'] + '/.cache/arvados/' + data_type
+    homedir = os.environ.get('HOME')
+    if not homedir or len(homedir) == 0:
+        return None
+    path = homedir + '/.cache/arvados/' + data_type
      try:
          util.mkdir_dash_p(path)
      except OSError:
@@ -134,11 +158,7 @@ def api(version=None, cache=True, host=None, token=None, insecure=False, **kwarg
              'https://%s/discovery/v1/apis/{api}/{apiVersion}/rest' % (host,))
  
      if 'http' not in kwargs:
-        http_kwargs = {}
-        # Prefer system's CA certificates (if available) over httplib2's.
-        certs_path = '/etc/ssl/certs/ca-certificates.crt'
-        if os.path.exists(certs_path):
-            http_kwargs['ca_certs'] = certs_path
+        http_kwargs = {'ca_certs': util.ca_certs_path()}
          if cache:
              http_kwargs['cache'] = http_cache('discovery')
          if insecure:
diff --git a/sdk/python/arvados/arvfile.py b/sdk/python/arvados/arvfile.py

index 53ae4a836c5965782413a0dd24db976ee1dfc28a..ce0e5e3564559c707825d9acad5717a3a5e42be0 100644 (file)
--- a/sdk/python/arvados/arvfile.py
+++ b/sdk/python/arvados/arvfile.py
@@ -9,13 +9,19 @@ import Queue
  import copy
  import errno
  import re
+import logging
  
-from .errors import KeepWriteError, AssertionError
+from .errors import KeepWriteError, AssertionError, ArgumentError
  from .keep import KeepLocator
  from ._normalize_stream import normalize_stream
  from ._ranges import locators_and_ranges, replace_range, Range
  from .retry import retry_method
  
+MOD = "mod"
+WRITE = "write"
+
+_logger = logging.getLogger('arvados.arvfile')
+
  def split(path):
      """split(path) -> streamname, filename
  
@@ -77,7 +83,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
          return re.sub('\.(bz2|gz)$', '', self.name)
  
      @_FileLikeObjectBase._before_close
-    def seek(self, pos, whence=os.SEEK_CUR):
+    def seek(self, pos, whence=os.SEEK_SET):
          if whence == os.SEEK_CUR:
              pos += self._filepos
          elif whence == os.SEEK_END:
@@ -123,7 +129,7 @@ class ArvadosFileReaderBase(_FileLikeObjectBase):
      @_FileLikeObjectBase._before_close
      @retry_method
      def decompress(self, decompress, size, num_retries=None):
-        for segment in self.readall(size, num_retries):
+        for segment in self.readall(size, num_retries=num_retries):
              data = decompress(segment)
              if data:
                  yield data
@@ -230,6 +236,13 @@ def synchronized(orig_func):
              return orig_func(self, *args, **kwargs)
      return synchronized_wrapper
  
+
+class StateChangeError(Exception):
+    def __init__(self, message, state, nextstate):
+        super(StateChangeError, self).__init__(message)
+        self.state = state
+        self.nextstate = nextstate
+
  class _BufferBlock(object):
      """A stand-in for a Keep block that is in the process of being written.
  
@@ -253,6 +266,7 @@ class _BufferBlock(object):
      WRITABLE = 0
      PENDING = 1
      COMMITTED = 2
+    ERROR = 3
  
      def __init__(self, blockid, starting_capacity, owner):
          """
@@ -274,6 +288,8 @@ class _BufferBlock(object):
          self._locator = None
          self.owner = owner
          self.lock = threading.Lock()
+        self.wait_for_commit = threading.Event()
+        self.error = None
  
      @synchronized
      def append(self, data):
@@ -295,17 +311,30 @@ class _BufferBlock(object):
          else:
              raise AssertionError("Buffer block is not writable")
  
+    STATE_TRANSITIONS = frozenset([
+            (WRITABLE, PENDING),
+            (PENDING, COMMITTED),
+            (PENDING, ERROR),
+            (ERROR, PENDING)])
+
      @synchronized
-    def set_state(self, nextstate, loc=None):
-        if ((self._state == _BufferBlock.WRITABLE and nextstate == _BufferBlock.PENDING) or
-            (self._state == _BufferBlock.PENDING and nextstate == _BufferBlock.COMMITTED)):
-            self._state = nextstate
-            if self._state == _BufferBlock.COMMITTED:
-                self._locator = loc
-                self.buffer_view = None
-                self.buffer_block = None
-        else:
-            raise AssertionError("Invalid state change from %s to %s" % (self.state, state))
+    def set_state(self, nextstate, val=None):
+        if (self._state, nextstate) not in self.STATE_TRANSITIONS:
+            raise StateChangeError("Invalid state change from %s to %s" % (self.state, nextstate), self.state, nextstate)
+        self._state = nextstate
+
+        if self._state == _BufferBlock.PENDING:
+            self.wait_for_commit.clear()
+
+        if self._state == _BufferBlock.COMMITTED:
+            self._locator = val
+            self.buffer_view = None
+            self.buffer_block = None
+            self.wait_for_commit.set()
+
+        if self._state == _BufferBlock.ERROR:
+            self.error = val
+            self.wait_for_commit.set()
  
      @synchronized
      def state(self):
@@ -325,11 +354,17 @@ class _BufferBlock(object):
      @synchronized
      def clone(self, new_blockid, owner):
          if self._state == _BufferBlock.COMMITTED:
-            raise AssertionError("Can only duplicate a writable or pending buffer block")
+            raise AssertionError("Cannot duplicate committed buffer block")
          bufferblock = _BufferBlock(new_blockid, self.size(), owner)
          bufferblock.append(self.buffer_view[0:self.size()])
          return bufferblock
  
+    @synchronized
+    def clear(self):
+        self.owner = None
+        self.buffer_block = None
+        self.buffer_view = None
+
  
  class NoopLock(object):
      def __enter__(self):
@@ -349,7 +384,7 @@ def must_be_writable(orig_func):
      @functools.wraps(orig_func)
      def must_be_writable_wrapper(self, *args, **kwargs):
          if not self.writable():
-            raise IOError((errno.EROFS, "Collection must be writable."))
+            raise IOError(errno.EROFS, "Collection is read-only.")
          return orig_func(self, *args, **kwargs)
      return must_be_writable_wrapper
  
@@ -361,19 +396,22 @@ class _BlockManager(object):
      Collection of ArvadosFiles.
  
      """
+
+    DEFAULT_PUT_THREADS = 2
+    DEFAULT_GET_THREADS = 2
+
      def __init__(self, keep):
          """keep: KeepClient object to use"""
          self._keep = keep
          self._bufferblocks = {}
          self._put_queue = None
-        self._put_errors = None
          self._put_threads = None
          self._prefetch_queue = None
          self._prefetch_threads = None
          self.lock = threading.Lock()
          self.prefetch_enabled = True
-        self.num_put_threads = 2
-        self.num_get_threads = 2
+        self.num_put_threads = _BlockManager.DEFAULT_PUT_THREADS
+        self.num_get_threads = _BlockManager.DEFAULT_GET_THREADS
  
      @synchronized
      def alloc_bufferblock(self, blockid=None, starting_capacity=2**14, owner=None):
@@ -415,6 +453,70 @@ class _BlockManager(object):
      def is_bufferblock(self, locator):
          return locator in self._bufferblocks
  
+    def _commit_bufferblock_worker(self):
+        """Background uploader thread."""
+
+        while True:
+            try:
+                bufferblock = self._put_queue.get()
+                if bufferblock is None:
+                    return
+
+                loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes())
+                bufferblock.set_state(_BufferBlock.COMMITTED, loc)
+
+            except Exception as e:
+                bufferblock.set_state(_BufferBlock.ERROR, e)
+            finally:
+                if self._put_queue is not None:
+                    self._put_queue.task_done()
+
+    @synchronized
+    def start_put_threads(self):
+        if self._put_threads is None:
+            # Start uploader threads.
+
+            # If we don't limit the Queue size, the upload queue can quickly
+            # grow to take up gigabytes of RAM if the writing process is
+            # generating data more quickly than it can be send to the Keep
+            # servers.
+            #
+            # With two upload threads and a queue size of 2, this means up to 4
+            # blocks pending.  If they are full 64 MiB blocks, that means up to
+            # 256 MiB of internal buffering, which is the same size as the
+            # default download block cache in KeepClient.
+            self._put_queue = Queue.Queue(maxsize=2)
+
+            self._put_threads = []
+            for i in xrange(0, self.num_put_threads):
+                thread = threading.Thread(target=self._commit_bufferblock_worker)
+                self._put_threads.append(thread)
+                thread.daemon = False
+                thread.start()
+
+    def _block_prefetch_worker(self):
+        """The background downloader thread."""
+        while True:
+            try:
+                b = self._prefetch_queue.get()
+                if b is None:
+                    return
+                self._keep.get(b)
+            except Exception:
+                pass
+
+    @synchronized
+    def start_get_threads(self):
+        if self._prefetch_threads is None:
+            self._prefetch_queue = Queue.Queue()
+            self._prefetch_threads = []
+            for i in xrange(0, self.num_get_threads):
+                thread = threading.Thread(target=self._block_prefetch_worker)
+                self._prefetch_threads.append(thread)
+                thread.daemon = True
+                thread.start()
+
+
      @synchronized
      def stop_threads(self):
          """Shut down and wait for background upload and download threads to finish."""
@@ -426,7 +528,6 @@ class _BlockManager(object):
                  t.join()
          self._put_threads = None
          self._put_queue = None
-        self._put_errors = None
  
          if self._prefetch_threads is not None:
              for t in self._prefetch_threads:
@@ -436,62 +537,60 @@ class _BlockManager(object):
          self._prefetch_threads = None
          self._prefetch_queue = None
  
-    def commit_bufferblock(self, block):
-        """Initiate a background upload of a bufferblock.
+    def __enter__(self):
+        return self
  
-        This will block if the upload queue is at capacity, otherwise it will
-        return immediately.
+    def __exit__(self, exc_type, exc_value, traceback):
+        self.stop_threads()
  
-        """
+    def __del__(self):
+        self.stop_threads()
  
-        def commit_bufferblock_worker(self):
-            """Background uploader thread."""
+    def commit_bufferblock(self, block, sync):
+        """Initiate a background upload of a bufferblock.
  
-            while True:
-                try:
-                    bufferblock = self._put_queue.get()
-                    if bufferblock is None:
-                        return
-                    loc = self._keep.put(bufferblock.buffer_view[0:bufferblock.write_pointer].tobytes())
-                    bufferblock.set_state(_BufferBlock.COMMITTED, loc)
+        :block:
+          The block object to upload
  
-                except Exception as e:
-                    self._put_errors.put((bufferblock.locator(), e))
-                finally:
-                    if self._put_queue is not None:
-                        self._put_queue.task_done()
+        :sync:
+          If `sync` is True, upload the block synchronously.
+          If `sync` is False, upload the block asynchronously.  This will
+          return immediately unless the upload queue is at capacity, in
+          which case it will wait on an upload queue slot.
  
-        with self.lock:
-            if self._put_threads is None:
-                # Start uploader threads.
-
-                # If we don't limit the Queue size, the upload queue can quickly
-                # grow to take up gigabytes of RAM if the writing process is
-                # generating data more quickly than it can be send to the Keep
-                # servers.
-                #
-                # With two upload threads and a queue size of 2, this means up to 4
-                # blocks pending.  If they are full 64 MiB blocks, that means up to
-                # 256 MiB of internal buffering, which is the same size as the
-                # default download block cache in KeepClient.
-                self._put_queue = Queue.Queue(maxsize=2)
-                self._put_errors = Queue.Queue()
-
-                self._put_threads = []
-                for i in xrange(0, self.num_put_threads):
-                    thread = threading.Thread(target=commit_bufferblock_worker, args=(self,))
-                    self._put_threads.append(thread)
-                    thread.daemon = True
-                    thread.start()
-
-        # Mark the block as PENDING so to disallow any more appends.
-        block.set_state(_BufferBlock.PENDING)
-        self._put_queue.put(block)
+        """
+
+        try:
+            # Mark the block as PENDING so to disallow any more appends.
+            block.set_state(_BufferBlock.PENDING)
+        except StateChangeError as e:
+            if e.state == _BufferBlock.PENDING and sync:
+                block.wait_for_commit.wait()
+                if block.state() == _BufferBlock.ERROR:
+                    raise block.error
+            return
+
+        if sync:
+            try:
+                loc = self._keep.put(block.buffer_view[0:block.write_pointer].tobytes())
+                block.set_state(_BufferBlock.COMMITTED, loc)
+            except Exception as e:
+                block.set_state(_BufferBlock.ERROR, e)
+                raise
+        else:
+            self.start_put_threads()
+            self._put_queue.put(block)
  
      @synchronized
      def get_bufferblock(self, locator):
          return self._bufferblocks.get(locator)
  
+    @synchronized
+    def delete_bufferblock(self, locator):
+        bb = self._bufferblocks[locator]
+        bb.clear()
+        del self._bufferblocks[locator]
+
      def get_block_contents(self, locator, num_retries, cache_only=False):
          """Fetch a block.
  
@@ -514,31 +613,34 @@ class _BlockManager(object):
      def commit_all(self):
          """Commit all outstanding buffer blocks.
  
-        Unlike commit_bufferblock(), this is a synchronous call, and will not
-        return until all buffer blocks are uploaded.  Raises
-        KeepWriteError() if any blocks failed to upload.
+        This is a synchronous call, and will not return until all buffer blocks
+        are uploaded.  Raises KeepWriteError() if any blocks failed to upload.
  
          """
          with self.lock:
              items = self._bufferblocks.items()
  
          for k,v in items:
-            if v.state() == _BufferBlock.WRITABLE:
-                self.commit_bufferblock(v)
+            if v.state() != _BufferBlock.COMMITTED:
+                v.owner.flush(sync=False)
  
          with self.lock:
              if self._put_queue is not None:
                  self._put_queue.join()
  
-                if not self._put_errors.empty():
-                    err = []
-                    try:
-                        while True:
-                            err.append(self._put_errors.get(False))
-                    except Queue.Empty:
-                        pass
+                err = []
+                for k,v in items:
+                    if v.state() == _BufferBlock.ERROR:
+                        err.append((v.locator(), v.error))
+                if err:
                      raise KeepWriteError("Error writing some blocks", err, label="block")
  
+        for k,v in items:
+            # flush again with sync=True to remove committed bufferblocks from
+            # the segments.
+            if v.owner:
+                v.owner.flush(sync=True)
+
      def block_prefetch(self, locator):
          """Initiate a background download of a block.
  
@@ -552,28 +654,14 @@ class _BlockManager(object):
          if not self.prefetch_enabled:
              return
  
-        def block_prefetch_worker(self):
-            """The background downloader thread."""
-            while True:
-                try:
-                    b = self._prefetch_queue.get()
-                    if b is None:
-                        return
-                    self._keep.get(b)
-                except Exception:
-                    pass
+        if self._keep.get_from_cache(locator) is not None:
+            return
  
          with self.lock:
              if locator in self._bufferblocks:
                  return
-            if self._prefetch_threads is None:
-                self._prefetch_queue = Queue.Queue()
-                self._prefetch_threads = []
-                for i in xrange(0, self.num_get_threads):
-                    thread = threading.Thread(target=block_prefetch_worker, args=(self,))
-                    self._prefetch_threads.append(thread)
-                    thread.daemon = True
-                    thread.start()
+
+        self.start_get_threads()
          self._prefetch_queue.put(locator)
  
  
@@ -588,7 +676,7 @@ class ArvadosFile(object):
  
      """
  
-    def __init__(self, parent, stream=[], segments=[]):
+    def __init__(self, parent, name, stream=[], segments=[]):
          """
          ArvadosFile constructor.
  
@@ -599,7 +687,8 @@ class ArvadosFile(object):
            a list of Range objects representing segments
          """
          self.parent = parent
-        self._modified = True
+        self.name = name
+        self._committed = False
          self._segments = []
          self.lock = parent.root_collection().lock
          for s in segments:
@@ -614,9 +703,9 @@ class ArvadosFile(object):
          return copy.copy(self._segments)
  
      @synchronized
-    def clone(self, new_parent):
+    def clone(self, new_parent, new_name):
          """Make a copy of this file."""
-        cp = ArvadosFile(new_parent)
+        cp = ArvadosFile(new_parent, new_name)
          cp.replace_contents(self)
          return cp
  
@@ -640,7 +729,7 @@ class ArvadosFile(object):
  
              self._segments.append(Range(new_loc, other_segment.range_start, other_segment.range_size, other_segment.segment_offset))
  
-        self._modified = True
+        self._committed = False
  
      def __eq__(self, other):
          if other is self:
@@ -676,14 +765,14 @@ class ArvadosFile(object):
          return not self.__eq__(other)
  
      @synchronized
-    def set_unmodified(self):
-        """Clear the modified flag"""
-        self._modified = False
+    def set_committed(self):
+        """Set committed flag to False"""
+        self._committed = True
  
      @synchronized
-    def modified(self):
-        """Test the modified flag"""
-        return self._modified
+    def committed(self):
+        """Get whether this is committed or not."""
+        return self._committed
  
      @must_be_writable
      @synchronized
@@ -703,7 +792,7 @@ class ArvadosFile(object):
                      # segment is past the trucate size, all done
                      break
                  elif size < range_end:
-                    nr = Range(r.locator, r.range_start, size - r.range_start)
+                    nr = Range(r.locator, r.range_start, size - r.range_start, 0)
                      nr.segment_offset = r.segment_offset
                      new_segs.append(nr)
                      break
@@ -711,32 +800,44 @@ class ArvadosFile(object):
                      new_segs.append(r)
  
              self._segments = new_segs
-            self._modified = True
+            self._committed = False
          elif size > self.size():
-            raise IOError("truncate() does not support extending the file size")
+            raise IOError(errno.EINVAL, "truncate() does not support extending the file size")
  
-    def readfrom(self, offset, size, num_retries):
-        """Read upto `size` bytes from the file starting at `offset`."""
+    def readfrom(self, offset, size, num_retries, exact=False):
+        """Read up to `size` bytes from the file starting at `offset`.
+
+        :exact:
+         If False (default), return less data than requested if the read
+         crosses a block boundary and the next block isn't cached.  If True,
+         only return less data than requested when hitting EOF.
+        """
  
          with self.lock:
              if size == 0 or offset >= self.size():
                  return ''
-            prefetch = locators_and_ranges(self._segments, offset, size + config.KEEP_BLOCK_SIZE)
              readsegs = locators_and_ranges(self._segments, offset, size)
+            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
  
-        for lr in prefetch:
-            self.parent._my_block_manager().block_prefetch(lr.locator)
-
+        locs = set()
          data = []
          for lr in readsegs:
-            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=bool(data))
+            block = self.parent._my_block_manager().get_block_contents(lr.locator, num_retries=num_retries, cache_only=(bool(data) and not exact))
              if block:
-                data.append(block[lr.segment_offset:lr.segment_offset+lr.segment_size])
+                blockview = memoryview(block)
+                data.append(blockview[lr.segment_offset:lr.segment_offset+lr.segment_size].tobytes())
+                locs.add(lr.locator)
              else:
                  break
+
+        for lr in prefetch:
+            if lr.locator not in locs:
+                self.parent._my_block_manager().block_prefetch(lr.locator)
+                locs.add(lr.locator)
+
          return ''.join(data)
  
-    def _repack_writes(self):
+    def _repack_writes(self, num_retries):
          """Test if the buffer block has more data than actual segments.
  
          This happens when a buffered write over-writes a file range written in
@@ -754,9 +855,10 @@ class ArvadosFile(object):
          if write_total < self._current_bblock.size():
              # There is more data in the buffer block than is actually accounted for by segments, so
              # re-pack into a new buffer by copying over to a new buffer block.
+            contents = self.parent._my_block_manager().get_block_contents(self._current_bblock.blockid, num_retries)
              new_bb = self.parent._my_block_manager().alloc_bufferblock(self._current_bblock.blockid, starting_capacity=write_total, owner=self)
              for t in bufferblock_segs:
-                new_bb.append(self._current_bblock.buffer_view[t.segment_offset:t.segment_offset+t.range_size].tobytes())
+                new_bb.append(contents[t.segment_offset:t.segment_offset+t.range_size])
                  t.segment_offset = new_bb.size() - t.range_size
  
              self._current_bblock = new_bb
@@ -777,28 +879,63 @@ class ArvadosFile(object):
              raise ArgumentError("Offset is past the end of the file")
  
          if len(data) > config.KEEP_BLOCK_SIZE:
-            raise ArgumentError("Please append data in chunks smaller than %i bytes (config.KEEP_BLOCK_SIZE)" % (config.KEEP_BLOCK_SIZE))
+            # Chunk it up into smaller writes
+            n = 0
+            dataview = memoryview(data)
+            while n < len(data):
+                self.writeto(offset+n, dataview[n:n + config.KEEP_BLOCK_SIZE].tobytes(), num_retries)
+                n += config.KEEP_BLOCK_SIZE
+            return
  
-        self._modified = True
+        self._committed = False
  
          if self._current_bblock is None or self._current_bblock.state() != _BufferBlock.WRITABLE:
              self._current_bblock = self.parent._my_block_manager().alloc_bufferblock(owner=self)
  
          if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
-            self._repack_writes()
+            self._repack_writes(num_retries)
              if (self._current_bblock.size() + len(data)) > config.KEEP_BLOCK_SIZE:
-                self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
+                self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=False)
                  self._current_bblock = self.parent._my_block_manager().alloc_bufferblock(owner=self)
  
          self._current_bblock.append(data)
  
          replace_range(self._segments, offset, len(data), self._current_bblock.blockid, self._current_bblock.write_pointer - len(data))
  
+        self.parent.notify(WRITE, self.parent, self.name, (self, self))
+
+        return len(data)
+
      @synchronized
-    def flush(self):
-        if self._current_bblock:
-            self._repack_writes()
-            self.parent._my_block_manager().commit_bufferblock(self._current_bblock)
+    def flush(self, sync=True, num_retries=0):
+        """Flush the current bufferblock to Keep.
+
+        :sync:
+          If True, commit block synchronously, wait until buffer block has been written.
+          If False, commit block asynchronously, return immediately after putting block into
+          the keep put queue.
+        """
+        if self.committed():
+            return
+
+        if self._current_bblock and self._current_bblock.state() != _BufferBlock.COMMITTED:
+            if self._current_bblock.state() == _BufferBlock.WRITABLE:
+                self._repack_writes(num_retries)
+            self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=sync)
+
+        if sync:
+            to_delete = set()
+            for s in self._segments:
+                bb = self.parent._my_block_manager().get_bufferblock(s.locator)
+                if bb:
+                    if bb.state() != _BufferBlock.COMMITTED:
+                        self.parent._my_block_manager().commit_bufferblock(self._current_bblock, sync=True)
+                    to_delete.add(s.locator)
+                    s.locator = bb.locator()
+            for s in to_delete:
+               self.parent._my_block_manager().delete_bufferblock(s)
+
+        self.parent.notify(MOD, self.parent, self.name, (self, self))
  
      @must_be_writable
      @synchronized
@@ -813,9 +950,9 @@ class ArvadosFile(object):
  
      def _add_segment(self, blocks, pos, size):
          """Internal implementation of add_segment."""
-        self._modified = True
+        self._committed = False
          for lr in locators_and_ranges(blocks, pos, size):
-            last = self._segments[-1] if self._segments else Range(0, 0, 0)
+            last = self._segments[-1] if self._segments else Range(0, 0, 0, 0)
              r = Range(lr.locator, last.range_start+last.range_size, lr.segment_size, lr.segment_offset)
              self._segments.append(r)
  
@@ -844,6 +981,16 @@ class ArvadosFile(object):
          buf += "\n"
          return buf
  
+    @must_be_writable
+    @synchronized
+    def _reparent(self, newparent, newname):
+        self._committed = False
+        self.flush(sync=True)
+        self.parent.remove(self.name)
+        self.parent = newparent
+        self.name = newname
+        self.lock = self.parent.root_collection().lock
+
  
  class ArvadosFileReader(ArvadosFileReaderBase):
      """Wraps ArvadosFile in a file-like object supporting reading only.
@@ -853,8 +1000,8 @@ class ArvadosFileReader(ArvadosFileReaderBase):
  
      """
  
-    def __init__(self, arvadosfile, name, mode="r", num_retries=None):
-        super(ArvadosFileReader, self).__init__(name, mode, num_retries=num_retries)
+    def __init__(self, arvadosfile, num_retries=None):
+        super(ArvadosFileReader, self).__init__(arvadosfile.name, "r", num_retries=num_retries)
          self.arvadosfile = arvadosfile
  
      def size(self):
@@ -865,16 +1012,32 @@ class ArvadosFileReader(ArvadosFileReaderBase):
  
      @_FileLikeObjectBase._before_close
      @retry_method
-    def read(self, size, num_retries=None):
-        """Read up to `size` bytes from the stream, starting at the current file position."""
-        data = self.arvadosfile.readfrom(self._filepos, size, num_retries)
-        self._filepos += len(data)
-        return data
+    def read(self, size=None, num_retries=None):
+        """Read up to `size` bytes from the file and return the result.
+
+        Starts at the current file position.  If `size` is None, read the
+        entire remainder of the file.
+        """
+        if size is None:
+            data = []
+            rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
+            while rd:
+                data.append(rd)
+                self._filepos += len(rd)
+                rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
+            return ''.join(data)
+        else:
+            data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
+            self._filepos += len(data)
+            return data
  
      @_FileLikeObjectBase._before_close
      @retry_method
      def readfrom(self, offset, size, num_retries=None):
-        """Read up to `size` bytes from the stream, starting at the current file position."""
+        """Read up to `size` bytes from the stream, starting at the specified file offset.
+
+        This method does not change the file position.
+        """
          return self.arvadosfile.readfrom(offset, size, num_retries)
  
      def flush(self):
@@ -889,8 +1052,9 @@ class ArvadosFileWriter(ArvadosFileReader):
  
      """
  
-    def __init__(self, arvadosfile, name, mode, num_retries=None):
-        super(ArvadosFileWriter, self).__init__(arvadosfile, name, mode, num_retries=num_retries)
+    def __init__(self, arvadosfile, mode, num_retries=None):
+        super(ArvadosFileWriter, self).__init__(arvadosfile, num_retries=num_retries)
+        self.mode = mode
  
      @_FileLikeObjectBase._before_close
      @retry_method
@@ -900,12 +1064,13 @@ class ArvadosFileWriter(ArvadosFileReader):
          else:
              self.arvadosfile.writeto(self._filepos, data, num_retries)
              self._filepos += len(data)
+        return len(data)
  
      @_FileLikeObjectBase._before_close
      @retry_method
      def writelines(self, seq, num_retries=None):
          for s in seq:
-            self.write(s, num_retries)
+            self.write(s, num_retries=num_retries)
  
      @_FileLikeObjectBase._before_close
      def truncate(self, size=None):
diff --git a/sdk/python/arvados/collection.py b/sdk/python/arvados/collection.py

index 3d48652dd53afe4eecc3bc35628646e427a8ac73..38e794c24a217ffa5c76c1a7b026e4d432d369a0 100644 (file)
--- a/sdk/python/arvados/collection.py
+++ b/sdk/python/arvados/collection.py
@@ -487,7 +487,8 @@ class RichCollectionBase(CollectionBase):
  
      def __init__(self, parent=None):
          self.parent = parent
-        self._modified = True
+        self._committed = False
+        self._callback = None
          self._items = {}
  
      def _my_api(self):
@@ -537,24 +538,24 @@ class RichCollectionBase(CollectionBase):
                  if item is None:
                      # create new file
                      if create_type == COLLECTION:
-                        item = Subcollection(self)
+                        item = Subcollection(self, pathcomponents[0])
                      else:
-                        item = ArvadosFile(self)
+                        item = ArvadosFile(self, pathcomponents[0])
                      self._items[pathcomponents[0]] = item
-                    self._modified = True
+                    self._committed = False
                      self.notify(ADD, self, pathcomponents[0], item)
                  return item
              else:
                  if item is None:
                      # create new collection
-                    item = Subcollection(self)
+                    item = Subcollection(self, pathcomponents[0])
                      self._items[pathcomponents[0]] = item
-                    self._modified = True
+                    self._committed = False
                      self.notify(ADD, self, pathcomponents[0], item)
                  if isinstance(item, RichCollectionBase):
                      return item.find_or_create(pathcomponents[1], create_type)
                  else:
-                    raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
+                    raise IOError(errno.ENOTDIR, "Not a directory: '%s'" % pathcomponents[0])
          else:
              return self
  
@@ -567,7 +568,7 @@ class RichCollectionBase(CollectionBase):
  
          """
          if not path:
-            raise errors.ArgumentError("Parameter 'path' must not be empty.")
+            raise errors.ArgumentError("Parameter 'path' is empty.")
  
          pathcomponents = path.split("/", 1)
          item = self._items.get(pathcomponents[0])
@@ -580,15 +581,20 @@ class RichCollectionBase(CollectionBase):
                  else:
                      return item
              else:
-                raise IOError((errno.ENOTDIR, "Interior path components must be subcollection"))
+                raise IOError(errno.ENOTDIR, "Is not a directory: %s" % pathcomponents[0])
  
-    def mkdirs(path):
+    @synchronized
+    def mkdirs(self, path):
          """Recursive subcollection create.
  
-        Like `os.mkdirs()`.  Will create intermediate subcollections needed to
-        contain the leaf subcollection path.
+        Like `os.makedirs()`.  Will create intermediate subcollections needed
+        to contain the leaf subcollection path.
  
          """
+
+        if self.find(path) != None:
+            raise IOError(errno.EEXIST, "Directory or file exists: '%s'" % path)
+
          return self.find_or_create(path, COLLECTION)
  
      def open(self, path, mode="r"):
@@ -615,7 +621,7 @@ class RichCollectionBase(CollectionBase):
          create = (mode != "r")
  
          if create and not self.writable():
-            raise IOError((errno.EROFS, "Collection is read only"))
+            raise IOError(errno.EROFS, "Collection is read only")
  
          if create:
              arvfile = self.find_or_create(path, FILE)
@@ -623,9 +629,9 @@ class RichCollectionBase(CollectionBase):
              arvfile = self.find(path)
  
          if arvfile is None:
-            raise IOError((errno.ENOENT, "File not found"))
+            raise IOError(errno.ENOENT, "File not found")
          if not isinstance(arvfile, ArvadosFile):
-            raise IOError((errno.EISDIR, "Path must refer to a file."))
+            raise IOError(errno.EISDIR, "Is a directory: %s" % path)
  
          if mode[0] == "w":
              arvfile.truncate(0)
@@ -633,26 +639,31 @@ class RichCollectionBase(CollectionBase):
          name = os.path.basename(path)
  
          if mode == "r":
-            return ArvadosFileReader(arvfile, name, mode, num_retries=self.num_retries)
+            return ArvadosFileReader(arvfile, num_retries=self.num_retries)
          else:
-            return ArvadosFileWriter(arvfile, name, mode, num_retries=self.num_retries)
+            return ArvadosFileWriter(arvfile, mode, num_retries=self.num_retries)
  
-    @synchronized
      def modified(self):
-        """Test if the collection (or any subcollection or file) has been modified."""
-        if self._modified:
-            return True
-        for k,v in self._items.items():
-            if v.modified():
-                return True
-        return False
+        """Determine if the collection has been modified since last commited."""
+        return not self.committed()
+
+    @synchronized
+    def committed(self):
+        """Determine if the collection has been committed to the API server."""
+
+        if self._committed is False:
+            return False
+        for v in self._items.values():
+            if v.committed() is False:
+                return False
+        return True
  
      @synchronized
-    def set_unmodified(self):
-        """Recursively clear modified flag."""
-        self._modified = False
+    def set_committed(self):
+        """Recursively set committed flag to True."""
+        self._committed = True
          for k,v in self._items.items():
-            v.set_unmodified()
+            v.set_committed()
  
      @synchronized
      def __iter__(self):
@@ -683,7 +694,7 @@ class RichCollectionBase(CollectionBase):
      def __delitem__(self, p):
          """Delete an item by name which is directly contained by this collection."""
          del self._items[p]
-        self._modified = True
+        self._committed = False
          self.notify(DEL, self, p, None)
  
      @synchronized
@@ -715,33 +726,33 @@ class RichCollectionBase(CollectionBase):
          """
  
          if not path:
-            raise errors.ArgumentError("Parameter 'path' must not be empty.")
+            raise errors.ArgumentError("Parameter 'path' is empty.")
  
          pathcomponents = path.split("/", 1)
          item = self._items.get(pathcomponents[0])
          if item is None:
-            raise IOError((errno.ENOENT, "File not found"))
+            raise IOError(errno.ENOENT, "File not found")
          if len(pathcomponents) == 1:
              if isinstance(self._items[pathcomponents[0]], RichCollectionBase) and len(self._items[pathcomponents[0]]) > 0 and not recursive:
-                raise IOError((errno.ENOTEMPTY, "Subcollection not empty"))
+                raise IOError(errno.ENOTEMPTY, "Subcollection not empty")
              deleteditem = self._items[pathcomponents[0]]
              del self._items[pathcomponents[0]]
-            self._modified = True
+            self._committed = False
              self.notify(DEL, self, pathcomponents[0], deleteditem)
          else:
              item.remove(pathcomponents[1])
  
      def _clonefrom(self, source):
          for k,v in source.items():
-            self._items[k] = v.clone(self)
+            self._items[k] = v.clone(self, k)
  
      def clone(self):
          raise NotImplementedError()
  
      @must_be_writable
      @synchronized
-    def add(self, source_obj, target_name, overwrite=False):
-        """Copy a file or subcollection to this collection.
+    def add(self, source_obj, target_name, overwrite=False, reparent=False):
+        """Copy or move a file or subcollection to this collection.
  
          :source_obj:
            An ArvadosFile, or Subcollection object
@@ -753,24 +764,74 @@ class RichCollectionBase(CollectionBase):
          :overwrite:
            Whether to overwrite target file if it already exists.
  
+        :reparent:
+          If True, source_obj will be moved from its parent collection to this collection.
+          If False, source_obj will be copied and the parent collection will be
+          unmodified.
+
          """
  
          if target_name in self and not overwrite:
-            raise IOError((errno.EEXIST, "File already exists"))
+            raise IOError(errno.EEXIST, "File already exists")
  
          modified_from = None
          if target_name in self:
              modified_from = self[target_name]
  
-        # Actually make the copy.
-        dup = source_obj.clone(self)
-        self._items[target_name] = dup
-        self._modified = True
+        # Actually make the move or copy.
+        if reparent:
+            source_obj._reparent(self, target_name)
+            item = source_obj
+        else:
+            item = source_obj.clone(self, target_name)
+
+        self._items[target_name] = item
+        self._committed = False
  
          if modified_from:
-            self.notify(MOD, self, target_name, (modified_from, dup))
+            self.notify(MOD, self, target_name, (modified_from, item))
          else:
-            self.notify(ADD, self, target_name, dup)
+            self.notify(ADD, self, target_name, item)
+
+    def _get_src_target(self, source, target_path, source_collection, create_dest):
+        if source_collection is None:
+            source_collection = self
+
+        # Find the object
+        if isinstance(source, basestring):
+            source_obj = source_collection.find(source)
+            if source_obj is None:
+                raise IOError(errno.ENOENT, "File not found")
+            sourcecomponents = source.split("/")
+        else:
+            source_obj = source
+            sourcecomponents = None
+
+        # Find parent collection the target path
+        targetcomponents = target_path.split("/")
+
+        # Determine the name to use.
+        target_name = targetcomponents[-1] if targetcomponents[-1] else sourcecomponents[-1]
+
+        if not target_name:
+            raise errors.ArgumentError("Target path is empty and source is an object.  Cannot determine destination filename to use.")
+
+        if create_dest:
+            target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
+        else:
+            if len(targetcomponents) > 1:
+                target_dir = self.find("/".join(targetcomponents[0:-1]))
+            else:
+                target_dir = self
+
+        if target_dir is None:
+            raise IOError(errno.ENOENT, "Target directory not found.")
+
+        if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
+            target_dir = target_dir[target_name]
+            target_name = sourcecomponents[-1]
+
+        return (source_obj, target_dir, target_name)
  
      @must_be_writable
      @synchronized
@@ -792,42 +853,79 @@ class RichCollectionBase(CollectionBase):
          :overwrite:
            Whether to overwrite target file if it already exists.
          """
-        if source_collection is None:
-            source_collection = self
  
-        # Find the object to copy
-        if isinstance(source, basestring):
-            source_obj = source_collection.find(source)
-            if source_obj is None:
-                raise IOError((errno.ENOENT, "File not found"))
-            sourcecomponents = source.split("/")
-        else:
-            source_obj = source
-            sourcecomponents = None
+        source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, True)
+        target_dir.add(source_obj, target_name, overwrite, False)
  
-        # Find parent collection the target path
-        targetcomponents = target_path.split("/")
+    @must_be_writable
+    @synchronized
+    def rename(self, source, target_path, source_collection=None, overwrite=False):
+        """Move a file or subcollection from `source_collection` to a new path in this collection.
  
-        # Determine the name to use.
-        target_name = targetcomponents[-1] if targetcomponents[-1] else (sourcecomponents[-1] if sourcecomponents else None)
+        :source:
+          A string with a path to source file or subcollection.
  
-        if not target_name:
-            raise errors.ArgumentError("Target path is empty and source is an object.  Cannot determine destination filename to use.")
+        :target_path:
+          Destination file or path.  If the target path already exists and is a
+          subcollection, the item will be placed inside the subcollection.  If
+          the target path already exists and is a file, this will raise an error
+          unless you specify `overwrite=True`.
  
-        target_dir = self.find_or_create("/".join(targetcomponents[0:-1]), COLLECTION)
+        :source_collection:
+          Collection to copy `source_path` from (default `self`)
  
-        if target_name in target_dir and isinstance(self[target_name], RichCollectionBase) and sourcecomponents:
-            target_dir = target_dir[target_name]
-            target_name = sourcecomponents[-1]
+        :overwrite:
+          Whether to overwrite target file if it already exists.
+        """
+
+        source_obj, target_dir, target_name = self._get_src_target(source, target_path, source_collection, False)
+        if not source_obj.writable():
+            raise IOError(errno.EROFS, "Source collection is read only.")
+        target_dir.add(source_obj, target_name, overwrite, True)
  
-        target_dir.add(source_obj, target_name, overwrite)
+    def portable_manifest_text(self, stream_name="."):
+        """Get the manifest text for this collection, sub collections and files.
+
+        This method does not flush outstanding blocks to Keep.  It will return
+        a normalized manifest with access tokens stripped.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        """
+        return self._get_manifest_text(stream_name, True, True)
  
      @synchronized
      def manifest_text(self, stream_name=".", strip=False, normalize=False):
          """Get the manifest text for this collection, sub collections and files.
  
+        This method will flush outstanding blocks to Keep.  By default, it will
+        not normalize an unmodified manifest or strip access tokens.
+
+        :stream_name:
+          Name to use for this stream (directory)
+
+        :strip:
+          If True, remove signing tokens from block locators if present.
+          If False (default), block locators are left unchanged.
+
+        :normalize:
+          If True, always export the manifest text in normalized form
+          even if the Collection is not modified.  If False (default) and the collection
+          is not modified, return the original manifest text even if it is not
+          in normalized form.
+
+        """
+
+        self._my_block_manager().commit_all()
+        return self._get_manifest_text(stream_name, strip, normalize)
+
+    @synchronized
+    def _get_manifest_text(self, stream_name, strip, normalize):
+        """Get the manifest text for this collection, sub collections and files.
+
          :stream_name:
-          Name of the stream (directory)
+          Name to use for this stream (directory)
  
          :strip:
            If True, remove signing tokens from block locators if present.
@@ -841,7 +939,7 @@ class RichCollectionBase(CollectionBase):
  
          """
  
-        if self.modified() or self._manifest_text is None or normalize:
+        if not self.committed() or self._manifest_text is None or normalize:
              stream = {}
              buf = []
              sorted_keys = sorted(self.keys())
@@ -861,7 +959,7 @@ class RichCollectionBase(CollectionBase):
              if stream:
                  buf.append(" ".join(normalize_stream(stream_name, stream)) + "\n")
              for dirname in [s for s in sorted_keys if isinstance(self[s], RichCollectionBase)]:
-                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip))
+                buf.append(self[dirname].manifest_text(stream_name=os.path.join(stream_name, dirname), strip=strip, normalize=True))
              return "".join(buf)
          else:
              if strip:
@@ -881,15 +979,15 @@ class RichCollectionBase(CollectionBase):
              holding_collection = Collection(api_client=self._my_api(), keep_client=self._my_keep())
          for k in self:
              if k not in end_collection:
-               changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection)))
+               changes.append((DEL, os.path.join(prefix, k), self[k].clone(holding_collection, "")))
          for k in end_collection:
              if k in self:
                  if isinstance(end_collection[k], Subcollection) and isinstance(self[k], Subcollection):
                      changes.extend(self[k].diff(end_collection[k], os.path.join(prefix, k), holding_collection))
                  elif end_collection[k] != self[k]:
-                    changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection), end_collection[k].clone(holding_collection)))
+                    changes.append((MOD, os.path.join(prefix, k), self[k].clone(holding_collection, ""), end_collection[k].clone(holding_collection, "")))
              else:
-                changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection)))
+                changes.append((ADD, os.path.join(prefix, k), end_collection[k].clone(holding_collection, "")))
          return changes
  
      @must_be_writable
@@ -901,12 +999,14 @@ class RichCollectionBase(CollectionBase):
          alternate path indicating the conflict.
  
          """
+        if changes:
+            self._committed = False
          for change in changes:
              event_type = change[0]
              path = change[1]
              initial = change[2]
              local = self.find(path)
-            conflictpath = "%s~conflict-%s~" % (path, time.strftime("%Y-%m-%d-%H:%M:%S",
+            conflictpath = "%s~%s~conflict~" % (path, time.strftime("%Y%m%d-%H%M%S",
                                                                      time.gmtime()))
              if event_type == ADD:
                  if local is None:
@@ -941,9 +1041,27 @@ class RichCollectionBase(CollectionBase):
  
      def portable_data_hash(self):
          """Get the portable data hash for this collection's manifest."""
-        stripped = self.manifest_text(strip=True)
+        stripped = self.portable_manifest_text()
          return hashlib.md5(stripped).hexdigest() + '+' + str(len(stripped))
  
+    @synchronized
+    def subscribe(self, callback):
+        if self._callback is None:
+            self._callback = callback
+        else:
+            raise errors.ArgumentError("A callback is already set on this collection.")
+
+    @synchronized
+    def unsubscribe(self):
+        if self._callback is not None:
+            self._callback = None
+
+    @synchronized
+    def notify(self, event, collection, name, item):
+        if self._callback:
+            self._callback(event, collection, name, item)
+        self.root_collection().notify(event, collection, name, item)
+
      @synchronized
      def __eq__(self, other):
          if other is self:
@@ -962,6 +1080,12 @@ class RichCollectionBase(CollectionBase):
      def __ne__(self, other):
          return not self.__eq__(other)
  
+    @synchronized
+    def flush(self):
+        """Flush bufferblocks to Keep."""
+        for e in self.values():
+            e.flush()
+
  
  class Collection(RichCollectionBase):
      """Represents the root of an Arvados Collection.
@@ -1044,9 +1168,9 @@ class Collection(RichCollectionBase):
          self._manifest_locator = None
          self._manifest_text = None
          self._api_response = None
+        self._past_versions = set()
  
          self.lock = threading.RLock()
-        self.callbacks = []
          self.events = None
  
          if manifest_locator_or_text:
@@ -1058,7 +1182,7 @@ class Collection(RichCollectionBase):
                  self._manifest_text = manifest_locator_or_text
              else:
                  raise errors.ArgumentError(
-                    "Argument to CollectionReader must be a manifest or a collection UUID")
+                    "Argument to CollectionReader is not a manifest or a collection UUID")
  
              try:
                  self._populate()
@@ -1074,6 +1198,10 @@ class Collection(RichCollectionBase):
      def writable(self):
          return True
  
+    @synchronized
+    def known_past_version(self, modified_at_and_portable_data_hash):
+        return modified_at_and_portable_data_hash in self._past_versions
+
      @synchronized
      @retry_method
      def update(self, other=None, num_retries=None):
@@ -1083,9 +1211,15 @@ class Collection(RichCollectionBase):
              if self._manifest_locator is None:
                  raise errors.ArgumentError("`other` is None but collection does not have a manifest_locator uuid")
              response = self._my_api().collections().get(uuid=self._manifest_locator).execute(num_retries=num_retries)
+            if self.known_past_version((response.get("modified_at"), response.get("portable_data_hash"))):
+                # We've merged this record this before.  Don't do anything.
+                return
+            else:
+                self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash")))
              other = CollectionReader(response["manifest_text"])
          baseline = CollectionReader(self._manifest_text)
          self.apply(baseline.diff(other))
+        self._manifest_text = self.manifest_text()
  
      @synchronized
      def _my_api(self):
@@ -1109,6 +1243,10 @@ class Collection(RichCollectionBase):
              self._block_manager = _BlockManager(self._my_keep())
          return self._block_manager
  
+    def _remember_api_response(self, response):
+        self._api_response = response
+        self._past_versions.add((response.get("modified_at"), response.get("portable_data_hash")))
+
      def _populate_from_api_server(self):
          # As in KeepClient itself, we must wait until the last
          # possible moment to instantiate an API client, in order to
@@ -1118,9 +1256,9 @@ class Collection(RichCollectionBase):
          # clause, just like any other Collection lookup
          # failure. Return an exception, or None if successful.
          try:
-            self._api_response = self._my_api().collections().get(
+            self._remember_api_response(self._my_api().collections().get(
                  uuid=self._manifest_locator).execute(
-                    num_retries=self.num_retries)
+                    num_retries=self.num_retries))
              self._manifest_text = self._api_response['manifest_text']
              return None
          except Exception as e:
@@ -1179,9 +1317,12 @@ class Collection(RichCollectionBase):
  
      def __exit__(self, exc_type, exc_value, traceback):
          """Support scoped auto-commit in a with: block."""
-        if exc_type is not None:
+        if exc_type is None:
              if self.writable() and self._has_collection_uuid():
                  self.save()
+        self.stop_threads()
+
+    def stop_threads(self):
          if self._block_manager is not None:
              self._block_manager.stop_threads()
  
@@ -1200,7 +1341,7 @@ class Collection(RichCollectionBase):
          return self._manifest_locator
  
      @synchronized
-    def clone(self, new_parent=None, readonly=False, new_config=None):
+    def clone(self, new_parent=None, new_name=None, readonly=False, new_config=None):
          if new_config is None:
              new_config = self._config
          if readonly:
@@ -1249,8 +1390,8 @@ class Collection(RichCollectionBase):
          """Save collection to an existing collection record.
  
          Commit pending buffer blocks to Keep, merge with remote record (if
-        merge=True, the default), write the manifest to Keep, and update the
-        collection record.
+        merge=True, the default), and update the collection record.  Returns
+        the current manifest text.
  
          Will raise AssertionError if not associated with a collection record on
          the API server.  If you want to save a manifest to Keep only, see
@@ -1264,40 +1405,48 @@ class Collection(RichCollectionBase):
            Retry count on API calls (if None,  use the collection default)
  
          """
-        if self.modified():
+        if not self.committed():
              if not self._has_collection_uuid():
-                raise AssertionError("Collection manifest_locator must be a collection uuid.  Use save_new() for new collections.")
+                raise AssertionError("Collection manifest_locator is not a collection uuid.  Use save_new() for new collections.")
+
              self._my_block_manager().commit_all()
+
              if merge:
                  self.update()
-            self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
  
              text = self.manifest_text(strip=False)
-            self._api_response = self._my_api().collections().update(
+            self._remember_api_response(self._my_api().collections().update(
                  uuid=self._manifest_locator,
                  body={'manifest_text': text}
                  ).execute(
-                    num_retries=num_retries)
+                    num_retries=num_retries))
              self._manifest_text = self._api_response["manifest_text"]
-            self.set_unmodified()
+            self.set_committed()
+
+        return self._manifest_text
  
  
      @must_be_writable
      @synchronized
      @retry_method
-    def save_new(self, name=None, create_collection_record=True, owner_uuid=None, ensure_unique_name=False, num_retries=None):
+    def save_new(self, name=None,
+                 create_collection_record=True,
+                 owner_uuid=None,
+                 ensure_unique_name=False,
+                 num_retries=None):
          """Save collection to a new collection record.
  
-        Commit pending buffer blocks to Keep, write the manifest to Keep, and
-        create a new collection record (if create_collection_record True).
-        After creating a new collection record, this Collection object will be
-        associated with the new record used by `save()`.
+        Commit pending buffer blocks to Keep and, when create_collection_record
+        is True (default), create a new collection record.  After creating a
+        new collection record, this Collection object will be associated with
+        the new record used by `save()`.  Returns the current manifest text.
  
          :name:
            The collection name.
  
          :create_collection_record:
-          If True, create a collection record.  If False, only save the manifest to keep.
+           If True, create a collection record on the API server.
+           If False, only commit blocks to Keep and return the manifest text.
  
          :owner_uuid:
            the user, or project uuid that will own this collection.
@@ -1313,38 +1462,27 @@ class Collection(RichCollectionBase):
  
          """
          self._my_block_manager().commit_all()
-        self._my_keep().put(self.manifest_text(strip=True), num_retries=num_retries)
          text = self.manifest_text(strip=False)
  
          if create_collection_record:
              if name is None:
-                name = "Collection created %s" % (time.strftime("%Y-%m-%d %H:%M:%S %Z", time.localtime()))
+                name = "New collection"
+                ensure_unique_name = True
  
              body = {"manifest_text": text,
                      "name": name}
              if owner_uuid:
                  body["owner_uuid"] = owner_uuid
  
-            self._api_response = self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries)
+            self._remember_api_response(self._my_api().collections().create(ensure_unique_name=ensure_unique_name, body=body).execute(num_retries=num_retries))
              text = self._api_response["manifest_text"]
  
              self._manifest_locator = self._api_response["uuid"]
  
-        self._manifest_text = text
-        self.set_unmodified()
-
-    @synchronized
-    def subscribe(self, callback):
-        self.callbacks.append(callback)
+            self._manifest_text = text
+            self.set_committed()
  
-    @synchronized
-    def unsubscribe(self, callback):
-        self.callbacks.remove(callback)
-
-    @synchronized
-    def notify(self, event, collection, name, item):
-        for c in self.callbacks:
-            c(event, collection, name, item)
+        return text
  
      @synchronized
      def _import_manifest(self, manifest_text):
@@ -1375,13 +1513,14 @@ class Collection(RichCollectionBase):
                  segments = []
                  streamoffset = 0L
                  state = BLOCKS
+                self.find_or_create(stream_name, COLLECTION)
                  continue
  
              if state == BLOCKS:
                  block_locator = re.match(r'[0-9a-f]{32}\+(\d+)(\+\S+)*', tok)
                  if block_locator:
                      blocksize = long(block_locator.group(1))
-                    blocks.append(Range(tok, streamoffset, blocksize))
+                    blocks.append(Range(tok, streamoffset, blocksize, 0))
                      streamoffset += blocksize
                  else:
                      state = SEGMENTS
@@ -1400,27 +1539,34 @@ class Collection(RichCollectionBase):
                          raise errors.SyntaxError("File %s conflicts with stream of the same name.", filepath)
                  else:
                      # error!
-                    raise errors.SyntaxError("Invalid manifest format")
+                    raise errors.SyntaxError("Invalid manifest format, expected file segment but did not match format: '%s'" % tok)
  
              if sep == "\n":
                  stream_name = None
                  state = STREAM_NAME
  
-        self.set_unmodified()
+        self.set_committed()
+
+    @synchronized
+    def notify(self, event, collection, name, item):
+        if self._callback:
+            self._callback(event, collection, name, item)
  
  
  class Subcollection(RichCollectionBase):
      """This is a subdirectory within a collection that doesn't have its own API
      server record.
  
-    It falls under the umbrella of the root collection.
+    Subcollection locking falls under the umbrella lock of its root collection.
  
      """
  
-    def __init__(self, parent):
+    def __init__(self, parent, name):
          super(Subcollection, self).__init__(parent)
          self.lock = self.root_collection().lock
          self._manifest_text = None
+        self.name = name
+        self.num_retries = parent.num_retries
  
      def root_collection(self):
          return self.parent.root_collection()
@@ -1437,21 +1583,25 @@ class Subcollection(RichCollectionBase):
      def _my_block_manager(self):
          return self.root_collection()._my_block_manager()
  
-    def notify(self, event, collection, name, item):
-        return self.root_collection().notify(event, collection, name, item)
-
      def stream_name(self):
-        for k, v in self.parent.items():
-            if v is self:
-                return os.path.join(self.parent.stream_name(), k)
-        return '.'
+        return os.path.join(self.parent.stream_name(), self.name)
  
      @synchronized
-    def clone(self, new_parent):
-        c = Subcollection(new_parent)
+    def clone(self, new_parent, new_name):
+        c = Subcollection(new_parent, new_name)
          c._clonefrom(self)
          return c
  
+    @must_be_writable
+    @synchronized
+    def _reparent(self, newparent, newname):
+        self._committed = False
+        self.flush()
+        self.parent.remove(self.name, recursive=True)
+        self.parent = newparent
+        self.name = newname
+        self.lock = self.parent.root_collection().lock
+
  
  class CollectionReader(Collection):
      """A read-only collection object.
diff --git a/sdk/python/arvados/commands/arv_copy.py b/sdk/python/arvados/commands/arv_copy.py

index 75f8ca97bd0025fc5d4147ee8925df68b995a159..8ee61f5bc123f260f566f373e878bef8ee8d5f26 100755 (executable)
--- a/sdk/python/arvados/commands/arv_copy.py
+++ b/sdk/python/arvados/commands/arv_copy.py
@@ -32,6 +32,8 @@ import arvados.util
  import arvados.commands._util as arv_cmd
  import arvados.commands.keepdocker
  
+from arvados.api import OrderedJsonModel
+
  logger = logging.getLogger('arvados.arv-copy')
  
  # local_repo_dir records which git repositories from the Arvados source
@@ -46,6 +48,9 @@ local_repo_dir = {}
  # destination collection UUIDs.
  collections_copied = {}
  
+# Set of (repository, script_version) two-tuples of commits copied in git.
+scripts_copied = set()
+
  # The owner_uuid of the object being copied
  src_owner_uuid = None
  
@@ -139,6 +144,7 @@ def main():
      exit(0)
  
  def set_src_owner_uuid(resource, uuid, args):
+    global src_owner_uuid
      c = resource.get(uuid=uuid).execute(num_retries=args.retries)
      src_owner_uuid = c.get("owner_uuid")
  
@@ -176,11 +182,19 @@ def api_for_instance(instance_name):
          client = arvados.api('v1',
                               host=cfg['ARVADOS_API_HOST'],
                               token=cfg['ARVADOS_API_TOKEN'],
-                             insecure=api_is_insecure)
+                             insecure=api_is_insecure,
+                             model=OrderedJsonModel())
      else:
          abort('need ARVADOS_API_HOST and ARVADOS_API_TOKEN for {}'.format(instance_name))
      return client
  
+# Check if git is available
+def check_git_availability():
+    try:
+        arvados.util.run_command(['git', '--help'])
+    except Exception:
+        abort('git command is not available. Please ensure git is installed.')
+
  # copy_pipeline_instance(pi_uuid, src, dst, args)
  #
  #    Copies a pipeline instance identified by pi_uuid from src to dst.
@@ -205,6 +219,8 @@ def copy_pipeline_instance(pi_uuid, src, dst, args):
      pi = src.pipeline_instances().get(uuid=pi_uuid).execute(num_retries=args.retries)
  
      if args.recursive:
+        check_git_availability()
+
          if not args.dst_git_repo:
              abort('--dst-git-repo is required when copying a pipeline recursively.')
          # Copy the pipeline template and save the copied template.
@@ -258,6 +274,8 @@ def copy_pipeline_template(pt_uuid, src, dst, args):
      pt = src.pipeline_templates().get(uuid=pt_uuid).execute(num_retries=args.retries)
  
      if args.recursive:
+        check_git_availability()
+
          if not args.dst_git_repo:
              abort('--dst-git-repo is required when copying a pipeline recursively.')
          # Copy input collections, docker images and git repos.
@@ -311,12 +329,38 @@ def copy_collections(obj, src, dst, args):
          obj = arvados.util.portable_data_hash_pattern.sub(copy_collection_fn, obj)
          obj = arvados.util.collection_uuid_pattern.sub(copy_collection_fn, obj)
          return obj
-    elif type(obj) == dict:
-        return {v: copy_collections(obj[v], src, dst, args) for v in obj}
-    elif type(obj) == list:
-        return [copy_collections(v, src, dst, args) for v in obj]
+    elif isinstance(obj, dict):
+        return type(obj)((v, copy_collections(obj[v], src, dst, args))
+                         for v in obj)
+    elif isinstance(obj, list):
+        return type(obj)(copy_collections(v, src, dst, args) for v in obj)
      return obj
  
+def migrate_jobspec(jobspec, src, dst, dst_repo, args):
+    """Copy a job's script to the destination repository, and update its record.
+
+    Given a jobspec dictionary, this function finds the referenced script from
+    src and copies it to dst and dst_repo.  It also updates jobspec in place to
+    refer to names on the destination.
+    """
+    repo = jobspec.get('repository')
+    if repo is None:
+        return
+    # script_version is the "script_version" parameter from the source
+    # component or job.  If no script_version was supplied in the
+    # component or job, it is a mistake in the pipeline, but for the
+    # purposes of copying the repository, default to "master".
+    script_version = jobspec.get('script_version') or 'master'
+    script_key = (repo, script_version)
+    if script_key not in scripts_copied:
+        copy_git_repo(repo, src, dst, dst_repo, script_version, args)
+        scripts_copied.add(script_key)
+    jobspec['repository'] = dst_repo
+    repo_dir = local_repo_dir[repo]
+    for version_key in ['script_version', 'supplied_script_version']:
+        if version_key in jobspec:
+            jobspec[version_key] = git_rev_parse(jobspec[version_key], repo_dir)
+
  # copy_git_repos(p, src, dst, dst_repo, args)
  #
  #    Copies all git repositories referenced by pipeline instance or
@@ -335,33 +379,10 @@ def copy_collections(obj, src, dst, args):
  #    names.  The return value is undefined.
  #
  def copy_git_repos(p, src, dst, dst_repo, args):
-    copied = set()
-    for c in p['components']:
-        component = p['components'][c]
-        if 'repository' in component:
-            repo = component['repository']
-            script_version = component.get('script_version', None)
-            if repo not in copied:
-                copy_git_repo(repo, src, dst, dst_repo, script_version, args)
-                copied.add(repo)
-            component['repository'] = dst_repo
-            if script_version:
-                repo_dir = local_repo_dir[repo]
-                component['script_version'] = git_rev_parse(script_version, repo_dir)
+    for component in p['components'].itervalues():
+        migrate_jobspec(component, src, dst, dst_repo, args)
          if 'job' in component:
-            j = component['job']
-            if 'repository' in j:
-                repo = j['repository']
-                script_version = j.get('script_version', None)
-                if repo not in copied:
-                    copy_git_repo(repo, src, dst, dst_repo, script_version, args)
-                    copied.add(repo)
-                j['repository'] = dst_repo
-                repo_dir = local_repo_dir[repo]
-                if script_version:
-                    j['script_version'] = git_rev_parse(script_version, repo_dir)
-                if 'supplied_script_version' in j:
-                    j['supplied_script_version'] = git_rev_parse(j['supplied_script_version'], repo_dir)
+            migrate_jobspec(component['job'], src, dst, dst_repo, args)
  
  def total_collection_size(manifest_text):
      """Return the total number of bytes in this collection (excluding
@@ -404,12 +425,10 @@ def create_collection_from(c, src, dst, args):
      for link_class in ("docker_image_repo+tag", "docker_image_hash"):
          docker_links = src.links().list(filters=[["head_uuid", "=", collection_uuid], ["link_class", "=", link_class]]).execute(num_retries=args.retries)['items']
  
-        for d in docker_links:
-            body={
-                'head_uuid': dst_collection['uuid'],
-                'link_class': link_class,
-                'name': d['name'],
-            }
+        for src_link in docker_links:
+            body = {key: src_link[key]
+                    for key in ['link_class', 'name', 'properties']}
+            body['head_uuid'] = dst_collection['uuid']
              body['owner_uuid'] = args.project_uuid
  
              lk = dst.links().create(body=body).execute(num_retries=args.retries)
@@ -529,31 +548,30 @@ def copy_collection(obj_uuid, src, dst, args):
      else:
          progress_writer = None
  
-    for line in manifest.splitlines(True):
+    for line in manifest.splitlines():
          words = line.split()
-        dst_manifest_line = words[0]
+        dst_manifest += words[0]
          for word in words[1:]:
              try:
                  loc = arvados.KeepLocator(word)
-                blockhash = loc.md5sum
-                # copy this block if we haven't seen it before
-                # (otherwise, just reuse the existing dst_locator)
-                if blockhash not in dst_locators:
-                    logger.debug("Copying block %s (%s bytes)", blockhash, loc.size)
-                    if progress_writer:
-                        progress_writer.report(obj_uuid, bytes_written, bytes_expected)
-                    data = src_keep.get(word)
-                    dst_locator = dst_keep.put(data)
-                    dst_locators[blockhash] = dst_locator
-                    bytes_written += loc.size
-                dst_manifest_line += ' ' + dst_locators[blockhash]
              except ValueError:
                  # If 'word' can't be parsed as a locator,
                  # presume it's a filename.
-                dst_manifest_line += ' ' + word
-        dst_manifest += dst_manifest_line
-        if line.endswith("\n"):
-            dst_manifest += "\n"
+                dst_manifest += ' ' + word
+                continue
+            blockhash = loc.md5sum
+            # copy this block if we haven't seen it before
+            # (otherwise, just reuse the existing dst_locator)
+            if blockhash not in dst_locators:
+                logger.debug("Copying block %s (%s bytes)", blockhash, loc.size)
+                if progress_writer:
+                    progress_writer.report(obj_uuid, bytes_written, bytes_expected)
+                data = src_keep.get(word)
+                dst_locator = dst_keep.put(data)
+                dst_locators[blockhash] = dst_locator
+                bytes_written += loc.size
+            dst_manifest += ' ' + dst_locators[blockhash]
+        dst_manifest += "\n"
  
      if progress_writer:
          progress_writer.report(obj_uuid, bytes_written, bytes_expected)
@@ -562,7 +580,6 @@ def copy_collection(obj_uuid, src, dst, args):
      # Copy the manifest and save the collection.
      logger.debug('saving %s with manifest: <%s>', obj_uuid, dst_manifest)
  
-    dst_keep.put(dst_manifest.encode('utf-8'))
      c['manifest_text'] = dst_manifest
      return create_collection_from(c, src, dst, args)
  
@@ -576,8 +593,7 @@ def copy_collection(obj_uuid, src, dst, args):
  #    All commits will be copied to a destination branch named for the
  #    source repository URL.
  #
-#    Because users cannot create their own repositories, the
-#    destination repository must already exist.
+#    The destination repository must already exist.
  #
  #    The user running this command must be authenticated
  #    to both repositories.
@@ -600,34 +616,23 @@ def copy_git_repo(src_git_repo, src, dst, dst_git_repo, script_version, args):
      dst_git_push_url  = r['items'][0]['push_url']
      logger.debug('dst_git_push_url: {}'.format(dst_git_push_url))
  
-    # script_version is the "script_version" parameter from the source
-    # component or job.  It is used here to tie the destination branch
-    # to the commit that was used on the source.  If no script_version
-    # was supplied in the component or job, it is a mistake in the pipeline,
-    # but for the purposes of copying the repository, default to "master".
-    #
-    if not script_version:
-        script_version = "master"
-
      dst_branch = re.sub(r'\W+', '_', "{}_{}".format(src_git_url, script_version))
  
-    # Copy git commits from src repo to dst repo (but only if
-    # we have not already copied this repo in this session).
-    #
-    if src_git_repo in local_repo_dir:
-        logger.debug('already copied src repo %s, skipping', src_git_repo)
-    else:
-        tmprepo = tempfile.mkdtemp()
-        local_repo_dir[src_git_repo] = tmprepo
+    # Copy git commits from src repo to dst repo.
+    if src_git_repo not in local_repo_dir:
+        local_repo_dir[src_git_repo] = tempfile.mkdtemp()
          arvados.util.run_command(
-            ["git", "clone", "--bare", src_git_url, tmprepo],
-            cwd=os.path.dirname(tmprepo))
+            ["git", "clone", "--bare", src_git_url,
+             local_repo_dir[src_git_repo]],
+            cwd=os.path.dirname(local_repo_dir[src_git_repo]))
          arvados.util.run_command(
-            ["git", "branch", dst_branch, script_version],
-            cwd=tmprepo)
-        arvados.util.run_command(["git", "remote", "add", "dst", dst_git_push_url], cwd=tmprepo)
-        arvados.util.run_command(["git", "push", "dst", dst_branch], cwd=tmprepo)
-
+            ["git", "remote", "add", "dst", dst_git_push_url],
+            cwd=local_repo_dir[src_git_repo])
+    arvados.util.run_command(
+        ["git", "branch", dst_branch, script_version],
+        cwd=local_repo_dir[src_git_repo])
+    arvados.util.run_command(["git", "push", "dst", dst_branch],
+                             cwd=local_repo_dir[src_git_repo])
  
  def copy_docker_images(pipeline, src, dst, args):
      """Copy any docker images named in the pipeline components'
diff --git a/sdk/python/arvados/commands/keepdocker.py b/sdk/python/arvados/commands/keepdocker.py

index 933fd77dd7cf3c71b64e5e30dc387e19513a62bd..e48a6d15472cc2c90cd4af7e35251b653aa87cce 100644 (file)
--- a/sdk/python/arvados/commands/keepdocker.py
+++ b/sdk/python/arvados/commands/keepdocker.py
@@ -1,6 +1,7 @@
  #!/usr/bin/env python
  
  import argparse
+import collections
  import datetime
  import errno
  import json
@@ -11,17 +12,20 @@ import tarfile
  import tempfile
  import _strptime
  
-from collections import namedtuple
+from operator import itemgetter
  from stat import *
  
  import arvados
+import arvados.util
  import arvados.commands._util as arv_cmd
  import arvados.commands.put as arv_put
+import ciso8601
  
+EARLIEST_DATETIME = datetime.datetime(datetime.MINYEAR, 1, 1, 0, 0, 0)
  STAT_CACHE_ERRORS = (IOError, OSError, ValueError)
  
-DockerImage = namedtuple('DockerImage',
-                         ['repo', 'tag', 'hash', 'created', 'vsize'])
+DockerImage = collections.namedtuple(
+    'DockerImage', ['repo', 'tag', 'hash', 'created', 'vsize'])
  
  keepdocker_parser = argparse.ArgumentParser(add_help=False)
  keepdocker_parser.add_argument(
@@ -159,11 +163,41 @@ def make_link(api_client, num_retries, link_class, link_name, **link_attrs):
      return api_client.links().create(body=link_attrs).execute(
          num_retries=num_retries)
  
-def ptimestamp(t):
-    s = t.split(".")
-    if len(s) == 2:
-        t = s[0] + s[1][-1:]
-    return datetime.datetime.strptime(t, "%Y-%m-%dT%H:%M:%SZ")
+def docker_link_sort_key(link):
+    """Build a sort key to find the latest available Docker image.
+
+    To find one source collection for a Docker image referenced by
+    name or image id, the API server looks for a link with the most
+    recent `image_timestamp` property; then the most recent
+    `created_at` timestamp.  This method generates a sort key for
+    Docker metadata links to sort them from least to most preferred.
+    """
+    try:
+        image_timestamp = ciso8601.parse_datetime_unaware(
+            link['properties']['image_timestamp'])
+    except (KeyError, ValueError):
+        image_timestamp = EARLIEST_DATETIME
+    return (image_timestamp,
+            ciso8601.parse_datetime_unaware(link['created_at']))
+
+def _get_docker_links(api_client, num_retries, **kwargs):
+    links = arvados.util.list_all(api_client.links().list,
+                                  num_retries, **kwargs)
+    for link in links:
+        link['_sort_key'] = docker_link_sort_key(link)
+    links.sort(key=itemgetter('_sort_key'), reverse=True)
+    return links
+
+def _new_image_listing(link, dockerhash, repo='<none>', tag='<none>'):
+    timestamp_index = 1 if (link['_sort_key'][0] is EARLIEST_DATETIME) else 0
+    return {
+        '_sort_key': link['_sort_key'],
+        'timestamp': link['_sort_key'][timestamp_index],
+        'collection': link['head_uuid'],
+        'dockerhash': dockerhash,
+        'repo': repo,
+        'tag': tag,
+        }
  
  def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None):
      """List all Docker images known to the api_client with image_name and
@@ -177,39 +211,77 @@ def list_images_in_arv(api_client, num_retries, image_name=None, image_tag=None)
      a dict with fields "dockerhash", "repo", "tag", and "timestamp".
  
      """
-    docker_image_filters = [['link_class', 'in', ['docker_image_hash', 'docker_image_repo+tag']]]
+    search_filters = []
+    repo_links = None
+    hash_links = None
      if image_name:
-        image_link_name = "{}:{}".format(image_name, image_tag or 'latest')
-        docker_image_filters.append(['name', '=', image_link_name])
-
-    existing_links = api_client.links().list(
-        filters=docker_image_filters
-        ).execute(num_retries=num_retries)['items']
-    images = {}
-    for link in existing_links:
-        collection_uuid = link["head_uuid"]
-        if collection_uuid not in images:
-            images[collection_uuid]= {"dockerhash": "<none>",
-                      "repo":"<none>",
-                      "tag":"<none>",
-                      "timestamp": ptimestamp("1970-01-01T00:00:01Z")}
-
-        if link["link_class"] == "docker_image_hash":
-            images[collection_uuid]["dockerhash"] = link["name"]
-
-        if link["link_class"] == "docker_image_repo+tag":
-            r = link["name"].split(":")
-            images[collection_uuid]["repo"] = r[0]
-            if len(r) > 1:
-                images[collection_uuid]["tag"] = r[1]
-
-        if "image_timestamp" in link["properties"]:
-            images[collection_uuid]["timestamp"] = ptimestamp(link["properties"]["image_timestamp"])
+        # Find images with the name the user specified.
+        search_links = _get_docker_links(
+            api_client, num_retries,
+            filters=[['link_class', '=', 'docker_image_repo+tag'],
+                     ['name', '=',
+                      '{}:{}'.format(image_name, image_tag or 'latest')]])
+        if search_links:
+            repo_links = search_links
          else:
-            images[collection_uuid]["timestamp"] = ptimestamp(link["created_at"])
-
-    return sorted(images.items(), lambda a, b: cmp(b[1]["timestamp"], a[1]["timestamp"]))
-
+            # Fall back to finding images with the specified image hash.
+            search_links = _get_docker_links(
+                api_client, num_retries,
+                filters=[['link_class', '=', 'docker_image_hash'],
+                         ['name', 'ilike', image_name + '%']])
+            hash_links = search_links
+        # Only list information about images that were found in the search.
+        search_filters.append(['head_uuid', 'in',
+                               [link['head_uuid'] for link in search_links]])
+
+    # It should be reasonable to expect that each collection only has one
+    # image hash (though there may be many links specifying this).  Find
+    # the API server's most preferred image hash link for each collection.
+    if hash_links is None:
+        hash_links = _get_docker_links(
+            api_client, num_retries,
+            filters=search_filters + [['link_class', '=', 'docker_image_hash']])
+    hash_link_map = {link['head_uuid']: link for link in reversed(hash_links)}
+
+    # Each collection may have more than one name (though again, one name
+    # may be specified more than once).  Build an image listing from name
+    # tags, sorted by API server preference.
+    if repo_links is None:
+        repo_links = _get_docker_links(
+            api_client, num_retries,
+            filters=search_filters + [['link_class', '=',
+                                       'docker_image_repo+tag']])
+    seen_image_names = collections.defaultdict(set)
+    images = []
+    for link in repo_links:
+        collection_uuid = link['head_uuid']
+        if link['name'] in seen_image_names[collection_uuid]:
+            continue
+        seen_image_names[collection_uuid].add(link['name'])
+        try:
+            dockerhash = hash_link_map[collection_uuid]['name']
+        except KeyError:
+            dockerhash = '<unknown>'
+        name_parts = link['name'].split(':', 1)
+        images.append(_new_image_listing(link, dockerhash, *name_parts))
+
+    # Find any image hash links that did not have a corresponding name link,
+    # and add image listings for them, retaining the API server preference
+    # sorting.
+    images_start_size = len(images)
+    for collection_uuid, link in hash_link_map.iteritems():
+        if not seen_image_names[collection_uuid]:
+            images.append(_new_image_listing(link, link['name']))
+    if len(images) > images_start_size:
+        images.sort(key=itemgetter('_sort_key'), reverse=True)
+
+    # Remove any image listings that refer to unknown collections.
+    existing_coll_uuids = {coll['uuid'] for coll in arvados.util.list_all(
+            api_client.collections().list, num_retries,
+            filters=[['uuid', 'in', [im['collection'] for im in images]]],
+            select=['uuid'])}
+    return [(image['collection'], image) for image in images
+            if image['collection'] in existing_coll_uuids]
  
  def main(arguments=None):
      args = arg_parser.parse_args(arguments)
diff --git a/sdk/python/arvados/commands/put.py b/sdk/python/arvados/commands/put.py

index d8ed90bda007525630a394b259b9528e845b62cc..7ca6e7ca234f9b7ef2f1bc4dfc6ef84910c9c1e0 100644 (file)
--- a/sdk/python/arvados/commands/put.py
+++ b/sdk/python/arvados/commands/put.py
@@ -5,6 +5,7 @@
  
  import argparse
  import arvados
+import arvados.collection
  import base64
  import datetime
  import errno
@@ -166,7 +167,9 @@ def parse_arguments(arguments):
      args = arg_parser.parse_args(arguments)
  
      if len(args.paths) == 0:
-        args.paths += ['/dev/stdin']
+        args.paths = ['-']
+
+    args.paths = map(lambda x: "-" if x == "/dev/stdin" else x, args.paths)
  
      if len(args.paths) != 1 or os.path.isdir(args.paths[0]):
          if args.filename:
@@ -181,9 +184,9 @@ def parse_arguments(arguments):
          args.progress = True
  
      if args.paths == ['-']:
-        args.paths = ['/dev/stdin']
+        args.resume = False
          if not args.filename:
-            args.filename = '-'
+            args.filename = 'stdin'
  
      return args
  
@@ -465,7 +468,16 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      writer.report_progress()
      writer.do_queued_work()  # Do work resumed from cache.
      for path in args.paths:  # Copy file data to Keep.
-        if os.path.isdir(path):
+        if path == '-':
+            writer.start_new_stream()
+            writer.start_new_file(args.filename)
+            r = sys.stdin.read(64*1024)
+            while r:
+                # Need to bypass _queued_file check in ResumableCollectionWriter.write() to get
+                # CollectionWriter.write().
+                super(arvados.collection.ResumableCollectionWriter, writer).write(r)
+                r = sys.stdin.read(64*1024)
+        elif os.path.isdir(path):
              writer.write_directory_tree(
                  path, max_manifest_depth=args.max_manifest_depth)
          else:
@@ -479,14 +491,14 @@ def main(arguments=None, stdout=sys.stdout, stderr=sys.stderr):
      if args.stream:
          output = writer.manifest_text()
          if args.normalize:
-            output = CollectionReader(output).manifest_text(normalize=True)
+            output = arvados.collection.CollectionReader(output).manifest_text(normalize=True)
      elif args.raw:
          output = ','.join(writer.data_locators())
      else:
          try:
              manifest_text = writer.manifest_text()
              if args.normalize:
-                manifest_text = CollectionReader(manifest_text).manifest_text(normalize=True)
+                manifest_text = arvados.collection.CollectionReader(manifest_text).manifest_text(normalize=True)
              replication_attr = 'replication_desired'
              if api_client._schema.schemas['Collection']['properties'].get(replication_attr, None) is None:
                  # API called it 'redundancy' before #3410.
diff --git a/sdk/python/arvados/commands/run.py b/sdk/python/arvados/commands/run.py

index f2bf0f353bbd146ece4775da7b6094d654ed0d04..be94e7304a34f3a2cdc829cd9a9f4b8339aaaf33 100644 (file)
--- a/sdk/python/arvados/commands/run.py
+++ b/sdk/python/arvados/commands/run.py
@@ -14,11 +14,12 @@ import logging
  import arvados.commands._util as arv_cmd
  
  logger = logging.getLogger('arvados.arv-run')
+logger.setLevel(logging.INFO)
  
  arvrun_parser = argparse.ArgumentParser(parents=[arv_cmd.retry_opt])
  arvrun_parser.add_argument('--dry-run', action="store_true", help="Print out the pipeline that would be submitted and exit")
  arvrun_parser.add_argument('--local', action="store_true", help="Run locally using arv-run-pipeline-instance")
-arvrun_parser.add_argument('--docker-image', type=str, default="arvados/jobs", help="Docker image to use, default arvados/jobs")
+arvrun_parser.add_argument('--docker-image', type=str, help="Docker image to use, otherwise use instance default.")
  arvrun_parser.add_argument('--ignore-rcode', action="store_true", help="Commands that return non-zero return codes should not be considered failed.")
  arvrun_parser.add_argument('--no-reuse', action="store_true", help="Do not reuse past jobs.")
  arvrun_parser.add_argument('--no-wait', action="store_true", help="Do not wait and display logs after submitting command, just exit.")
@@ -100,6 +101,63 @@ def statfile(prefix, fn):
  
      return prefix+fn
  
+def uploadfiles(files, api, dry_run=False, num_retries=0, project=None):
+    # Find the smallest path prefix that includes all the files that need to be uploaded.
+    # This starts at the root and iteratively removes common parent directory prefixes
+    # until all file pathes no longer have a common parent.
+    n = True
+    pathprefix = "/"
+    while n:
+        pathstep = None
+        for c in files:
+            if pathstep is None:
+                sp = c.fn.split('/')
+                if len(sp) < 2:
+                    # no parent directories left
+                    n = False
+                    break
+                # path step takes next directory
+                pathstep = sp[0] + "/"
+            else:
+                # check if pathstep is common prefix for all files
+                if not c.fn.startswith(pathstep):
+                    n = False
+                    break
+        if n:
+            # pathstep is common parent directory for all files, so remove the prefix
+            # from each path
+            pathprefix += pathstep
+            for c in files:
+                c.fn = c.fn[len(pathstep):]
+
+    orgdir = os.getcwd()
+    os.chdir(pathprefix)
+
+    logger.info("Upload local files: \"%s\"", '" "'.join([c.fn for c in files]))
+
+    if dry_run:
+        logger.info("$(input) is %s", pathprefix.rstrip('/'))
+        pdh = "$(input)"
+    else:
+        files = sorted(files, key=lambda x: x.fn)
+        collection = arvados.CollectionWriter(api, num_retries=num_retries)
+        stream = None
+        for f in files:
+            sp = os.path.split(f.fn)
+            if sp[0] != stream:
+                stream = sp[0]
+                collection.start_new_stream(stream)
+            collection.write_file(f.fn, sp[1])
+        item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
+        pdh = item["portable_data_hash"]
+        logger.info("Uploaded to %s", item["uuid"])
+
+    for c in files:
+        c.fn = "$(file %s/%s)" % (pdh, c.fn)
+
+    os.chdir(orgdir)
+
+
  def main(arguments=None):
      args = arvrun_parser.parse_args(arguments)
  
@@ -178,62 +236,9 @@ def main(arguments=None):
                              command[i] = statfile(m.group(1), m.group(2))
                              break
  
-    n = True
-    pathprefix = "/"
      files = [c for command in slots[1:] for c in command if isinstance(c, UploadFile)]
-    if len(files) > 0:
-        # Find the smallest path prefix that includes all the files that need to be uploaded.
-        # This starts at the root and iteratively removes common parent directory prefixes
-        # until all file pathes no longer have a common parent.
-        while n:
-            pathstep = None
-            for c in files:
-                if pathstep is None:
-                    sp = c.fn.split('/')
-                    if len(sp) < 2:
-                        # no parent directories left
-                        n = False
-                        break
-                    # path step takes next directory
-                    pathstep = sp[0] + "/"
-                else:
-                    # check if pathstep is common prefix for all files
-                    if not c.fn.startswith(pathstep):
-                        n = False
-                        break
-            if n:
-                # pathstep is common parent directory for all files, so remove the prefix
-                # from each path
-                pathprefix += pathstep
-                for c in files:
-                    c.fn = c.fn[len(pathstep):]
-
-        orgdir = os.getcwd()
-        os.chdir(pathprefix)
-
-        print("Upload local files: \"%s\"" % '" "'.join([c.fn for c in files]))
-
-        if args.dry_run:
-            print("$(input) is %s" % pathprefix.rstrip('/'))
-            pdh = "$(input)"
-        else:
-            files = sorted(files, key=lambda x: x.fn)
-            collection = arvados.CollectionWriter(api, num_retries=args.retries)
-            stream = None
-            for f in files:
-                sp = os.path.split(f.fn)
-                if sp[0] != stream:
-                    stream = sp[0]
-                    collection.start_new_stream(stream)
-                collection.write_file(f.fn, sp[1])
-            item = api.collections().create(body={"owner_uuid": project, "manifest_text": collection.manifest_text()}).execute()
-            pdh = item["portable_data_hash"]
-            print "Uploaded to %s" % item["uuid"]
-
-        for c in files:
-            c.fn = "$(file %s/%s)" % (pdh, c.fn)
-
-        os.chdir(orgdir)
+    if files:
+        uploadfiles(files, api, dry_run=args.dry_run, num_retries=args.num_retries, project=project)
  
      for i in xrange(1, len(slots)):
          slots[i] = [("%s%s" % (c.prefix, c.fn)) if isinstance(c, ArvFile) else c for c in slots[i]]
@@ -244,11 +249,12 @@ def main(arguments=None):
          "repository": args.repository,
          "script_parameters": {
          },
-        "runtime_constraints": {
-            "docker_image": args.docker_image
-        }
+        "runtime_constraints": {}
      }
  
+    if args.docker_image:
+        component["runtime_constraints"]["docker_image"] = args.docker_image
+
      task_foreach = []
      group_parser = argparse.ArgumentParser()
      group_parser.add_argument('-b', '--batch-size', type=int)
@@ -298,7 +304,7 @@ def main(arguments=None):
      else:
          pipeline["owner_uuid"] = project
          pi = api.pipeline_instances().create(body=pipeline, ensure_unique_name=True).execute()
-        print "Running pipeline %s" % pi["uuid"]
+        logger.info("Running pipeline %s", pi["uuid"])
  
          if args.local:
              subprocess.call(["arv-run-pipeline-instance", "--instance", pi["uuid"], "--run-jobs-here"] + (["--no-reuse"] if args.no_reuse else []))
@@ -306,11 +312,11 @@ def main(arguments=None):
              ws.main(["--pipeline", pi["uuid"]])
  
          pi = api.pipeline_instances().get(uuid=pi["uuid"]).execute()
-        print "Pipeline is %s" % pi["state"]
+        logger.info("Pipeline is %s", pi["state"])
          if "output_uuid" in pi["components"]["command"]:
-            print "Output is %s" % pi["components"]["command"]["output_uuid"]
+            logger.info("Output is %s", pi["components"]["command"]["output_uuid"])
          else:
-            print "No output"
+            logger.info("No output")
  
  if __name__ == '__main__':
      main()
diff --git a/sdk/python/arvados/commands/ws.py b/sdk/python/arvados/commands/ws.py

index 9bce9971076372394fdf2e5c1f9523eda9d115ec..347075dffdbb8cca1144f277b46086929ba86ab7 100644 (file)
--- a/sdk/python/arvados/commands/ws.py
+++ b/sdk/python/arvados/commands/ws.py
@@ -14,6 +14,7 @@ def main(arguments=None):
      parser = argparse.ArgumentParser()
      parser.add_argument('-u', '--uuid', type=str, default="", help="Filter events on object_uuid")
      parser.add_argument('-f', '--filters', type=str, default="", help="Arvados query filter to apply to log events (JSON encoded)")
+    parser.add_argument('-s', '--start-time', type=str, default="", help="Arvados query filter to fetch log events created at or after this time. This will be server time in UTC. Allowed format: YYYY-MM-DD or YYYY-MM-DD hh:mm:ss")
  
      group = parser.add_mutually_exclusive_group()
      group.add_argument('--poll-interval', default=15, type=int, help="If websockets is not available, specify the polling interval, default is every 15 seconds")
@@ -60,6 +61,12 @@ def main(arguments=None):
      if args.pipeline:
          filters += [ ['object_uuid', '=', args.pipeline] ]
  
+    if args.start_time:
+        last_log_id = 1
+        filters += [ ['created_at', '>=', args.start_time] ]
+    else:
+        last_log_id = None
+
      def on_message(ev):
          global filters
          global ws
@@ -85,7 +92,7 @@ def main(arguments=None):
              print json.dumps(ev)
  
      try:
-        ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval)
+        ws = subscribe(arvados.api('v1'), filters, on_message, poll_fallback=args.poll_interval, last_log_id=last_log_id)
          if ws:
              if args.pipeline:
                  c = api.pipeline_instances().get(uuid=args.pipeline).execute()
diff --git a/sdk/python/arvados/errors.py b/sdk/python/arvados/errors.py

index 3629520a4d5f7216a86a5b085585e4f0756b3581..bfd471ba52bee712a1e1768c91327ce28a9c6603 100644 (file)
--- a/sdk/python/arvados/errors.py
+++ b/sdk/python/arvados/errors.py
@@ -1,7 +1,6 @@
  # errors.py - Arvados-specific exceptions.
  
  import json
-import requests
  
  from apiclient import errors as apiclient_errors
  from collections import OrderedDict
@@ -46,7 +45,7 @@ class KeepRequestError(Exception):
          self.message = message
  
      def _format_error(self, key, error):
-        if isinstance(error, requests.Response):
+        if isinstance(error, HttpError):
              err_fmt = "{} {} responded with {e.status_code} {e.reason}"
          else:
              err_fmt = "{} {} raised {e.__class__.__name__} ({e})"
@@ -61,6 +60,12 @@ class KeepRequestError(Exception):
          return self._request_errors
  
  
+class HttpError(Exception):
+    def __init__(self, status_code, reason):
+        self.status_code = status_code
+        self.reason = reason
+
+
  class ArgumentError(Exception):
      pass
  class SyntaxError(Exception):
diff --git a/sdk/python/arvados/events.py b/sdk/python/arvados/events.py

index 09f2a871a966522201f5808e09fb37a820b1baf5..df5b3e7dee514b8aea45edaa73bcd47042d8fab3 100644 (file)
--- a/sdk/python/arvados/events.py
+++ b/sdk/python/arvados/events.py
@@ -14,12 +14,8 @@ from ws4py.client.threadedclient import WebSocketClient
  _logger = logging.getLogger('arvados.events')
  
  class EventClient(WebSocketClient):
-    def __init__(self, url, filters, on_event):
-        # Prefer system's CA certificates (if available)
-        ssl_options = {}
-        certs_path = '/etc/ssl/certs/ca-certificates.crt'
-        if os.path.exists(certs_path):
-            ssl_options['ca_certs'] = certs_path
+    def __init__(self, url, filters, on_event, last_log_id):
+        ssl_options = {'ca_certs': arvados.util.ca_certs_path()}
          if config.flag_is_true('ARVADOS_API_HOST_INSECURE'):
              ssl_options['cert_reqs'] = ssl.CERT_NONE
          else:
@@ -32,19 +28,33 @@ class EventClient(WebSocketClient):
          super(EventClient, self).__init__(url, ssl_options=ssl_options)
          self.filters = filters
          self.on_event = on_event
+        self.stop = threading.Event()
+        self.last_log_id = last_log_id
  
      def opened(self):
-        self.subscribe(self.filters)
+        self.subscribe(self.filters, self.last_log_id)
  
      def received_message(self, m):
          self.on_event(json.loads(str(m)))
  
-    def close_connection(self):
-        try:
-            self.sock.shutdown(socket.SHUT_RDWR)
-            self.sock.close()
-        except:
-            pass
+    def closed(self, code, reason=None):
+        self.stop.set()
+
+    def close(self, code=1000, reason=''):
+        """Close event client and wait for it to finish."""
+
+        # parent close() method sends a asynchronous "closed" event to the server
+        super(EventClient, self).close(code, reason)
+
+        # if server doesn't respond by finishing the close handshake, we'll be
+        # stuck in limbo forever.  We don't need to wait for the server to
+        # respond to go ahead and actually close the socket.
+        self.close_connection()
+
+        # wait for websocket thread to finish up (closed() is called by
+        # websocket thread in as part of terminate())
+        while not self.stop.is_set():
+            self.stop.wait(1)
  
      def subscribe(self, filters, last_log_id=None):
          m = {"method": "subscribe", "filters": filters}
@@ -56,7 +66,7 @@ class EventClient(WebSocketClient):
          self.send(json.dumps({"method": "unsubscribe", "filters": filters}))
  
  class PollClient(threading.Thread):
-    def __init__(self, api, filters, on_event, poll_time):
+    def __init__(self, api, filters, on_event, poll_time, last_log_id):
          super(PollClient, self).__init__()
          self.api = api
          if filters:
@@ -67,27 +77,35 @@ class PollClient(threading.Thread):
          self.poll_time = poll_time
          self.daemon = True
          self.stop = threading.Event()
+        self.last_log_id = last_log_id
  
      def run(self):
          self.id = 0
-        for f in self.filters:
-            items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
-            if items:
-                if items[0]['id'] > self.id:
-                    self.id = items[0]['id']
+        if self.last_log_id != None:
+            self.id = self.last_log_id
+        else:
+            for f in self.filters:
+                items = self.api.logs().list(limit=1, order="id desc", filters=f).execute()['items']
+                if items:
+                    if items[0]['id'] > self.id:
+                        self.id = items[0]['id']
  
          self.on_event({'status': 200})
  
          while not self.stop.isSet():
              max_id = self.id
+            moreitems = False
              for f in self.filters:
-                items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()['items']
-                for i in items:
+                items = self.api.logs().list(order="id asc", filters=f+[["id", ">", str(self.id)]]).execute()
+                for i in items["items"]:
                      if i['id'] > max_id:
                          max_id = i['id']
                      self.on_event(i)
+                if items["items_available"] > len(items["items"]):
+                    moreitems = True
              self.id = max_id
-            self.stop.wait(self.poll_time)
+            if not moreitems:
+                self.stop.wait(self.poll_time)
  
      def run_forever(self):
          # Have to poll here, otherwise KeyboardInterrupt will never get processed.
@@ -95,6 +113,8 @@ class PollClient(threading.Thread):
              self.stop.wait(1)
  
      def close(self):
+        """Close poll client and wait for it to finish."""
+
          self.stop.set()
          try:
              self.join()
@@ -113,23 +133,28 @@ class PollClient(threading.Thread):
          del self.filters[self.filters.index(filters)]
  
  
-def _subscribe_websocket(api, filters, on_event):
+def _subscribe_websocket(api, filters, on_event, last_log_id=None):
      endpoint = api._rootDesc.get('websocketUrl', None)
      if not endpoint:
          raise errors.FeatureNotEnabledError(
              "Server does not advertise a websocket endpoint")
-    uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
-    client = EventClient(uri_with_token, filters, on_event)
-    ok = False
      try:
-        client.connect()
-        ok = True
-        return client
-    finally:
-        if not ok:
-            client.close_connection()
-
-def subscribe(api, filters, on_event, poll_fallback=15):
+        uri_with_token = "{}?api_token={}".format(endpoint, api.api_token)
+        client = EventClient(uri_with_token, filters, on_event, last_log_id)
+        ok = False
+        try:
+            client.connect()
+            ok = True
+            return client
+        finally:
+            if not ok:
+                client.close_connection()
+    except:
+        _logger.warn("Failed to connect to websockets on %s" % endpoint)
+        raise
+
+
+def subscribe(api, filters, on_event, poll_fallback=15, last_log_id=None):
      """
      :api:
        a client object retrieved from arvados.api(). The caller should not use this client object for anything else after calling subscribe().
@@ -139,15 +164,17 @@ def subscribe(api, filters, on_event, poll_fallback=15):
        The callback when a message is received.
      :poll_fallback:
        If websockets are not available, fall back to polling every N seconds.  If poll_fallback=False, this will return None if websockets are not available.
+    :last_log_id:
+      Log rows that are newer than the log id
      """
  
      if not poll_fallback:
-        return _subscribe_websocket(api, filters, on_event)
+        return _subscribe_websocket(api, filters, on_event, last_log_id)
  
      try:
-        return _subscribe_websocket(api, filters, on_event)
+        return _subscribe_websocket(api, filters, on_event, last_log_id)
      except Exception as e:
          _logger.warn("Falling back to polling after websocket error: %s" % e)
-    p = PollClient(api, filters, on_event, poll_fallback)
+    p = PollClient(api, filters, on_event, poll_fallback, last_log_id)
      p.start()
      return p
diff --git a/sdk/python/arvados/keep.py b/sdk/python/arvados/keep.py

index 6196b502021a6036aa96c33d61c9bc6fb5d4f4f1..63b99daedd3d3931ac3822da62bff1d556d0806e 100644 (file)
--- a/sdk/python/arvados/keep.py
+++ b/sdk/python/arvados/keep.py
@@ -1,25 +1,28 @@
+import bz2
+import datetime
+import fcntl
+import functools
  import gflags
+import hashlib
+import json
  import logging
  import os
  import pprint
-import sys
-import types
-import subprocess
-import json
-import UserDict
+import pycurl
+import Queue
  import re
-import hashlib
+import socket
+import ssl
  import string
-import bz2
-import zlib
-import fcntl
-import time
+import cStringIO
+import subprocess
+import sys
  import threading
+import time
  import timer
-import datetime
-import ssl
-import socket
-import requests
+import types
+import UserDict
+import zlib
  
  import arvados
  import arvados.config as config
@@ -27,25 +30,10 @@ import arvados.errors
  import arvados.retry as retry
  import arvados.util
  
-try:
-    # Workaround for urllib3 bug.
-    # The 'requests' library enables urllib3's SNI support by default, which uses pyopenssl.
-    # However, urllib3 prior to version 1.10 has a major bug in this feature
-    # (OpenSSL WantWriteError, https://github.com/shazow/urllib3/issues/412)
-    # Unfortunately Debian 8 is stabilizing on urllib3 1.9.1 which means the
-    # following workaround is necessary to be able to use
-    # the arvados python sdk with the distribution-provided packages.
-    import urllib3
-    from pkg_resources import parse_version
-    if parse_version(urllib3.__version__) < parse_version('1.10'):
-        from urllib3.contrib import pyopenssl
-        pyopenssl.extract_from_urllib3()
-except ImportError:
-    pass
-
  _logger = logging.getLogger('arvados.keep')
  global_client_object = None
  
+
  class KeepLocator(object):
      EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
      HINT_RE = re.compile(r'^[A-Z][A-Za-z0-9@_-]+$')
@@ -62,7 +50,7 @@ class KeepLocator(object):
              self.size = None
          for hint in pieces:
              if self.HINT_RE.match(hint) is None:
-                raise ValueError("unrecognized hint data {}".format(hint))
+                raise ValueError("invalid hint format: {}".format(hint))
              elif hint.startswith('A'):
                  self.parse_permission_hint(hint)
              else:
@@ -88,7 +76,7 @@ class KeepLocator(object):
              return getattr(self, data_name)
          def setter(self, hex_str):
              if not arvados.util.is_hex(hex_str, length):
-                raise ValueError("{} must be a {}-digit hex string: {}".
+                raise ValueError("{} is not a {}-digit hex string: {}".
                                   format(name, length, hex_str))
              setattr(self, data_name, hex_str)
          return property(getter, setter)
@@ -301,75 +289,216 @@ class KeepClient(object):
  
  
      class KeepService(object):
-        # Make requests to a single Keep service, and track results.
-        HTTP_ERRORS = (requests.exceptions.RequestException,
-                       socket.error, ssl.SSLError)
+        """Make requests to a single Keep service, and track results.
+
+        A KeepService is intended to last long enough to perform one
+        transaction (GET or PUT) against one Keep service. This can
+        involve calling either get() or put() multiple times in order
+        to retry after transient failures. However, calling both get()
+        and put() on a single instance -- or using the same instance
+        to access two different Keep services -- will not produce
+        sensible behavior.
+        """
+
+        HTTP_ERRORS = (
+            socket.error,
+            ssl.SSLError,
+            arvados.errors.HttpError,
+        )
  
-        def __init__(self, root, session, **headers):
+        def __init__(self, root, user_agent_pool=Queue.LifoQueue(), **headers):
              self.root = root
-            self.last_result = None
-            self.success_flag = None
-            self.session = session
+            self._user_agent_pool = user_agent_pool
+            self._result = {'error': None}
+            self._usable = True
+            self._session = None
              self.get_headers = {'Accept': 'application/octet-stream'}
              self.get_headers.update(headers)
              self.put_headers = headers
  
          def usable(self):
-            return self.success_flag is not False
+            """Is it worth attempting a request?"""
+            return self._usable
  
          def finished(self):
-            return self.success_flag is not None
+            """Did the request succeed or encounter permanent failure?"""
+            return self._result['error'] == False or not self._usable
+
+        def last_result(self):
+            return self._result
  
-        def last_status(self):
+        def _get_user_agent(self):
              try:
-                return self.last_result.status_code
-            except AttributeError:
-                return None
+                return self._user_agent_pool.get(False)
+            except Queue.Empty:
+                return pycurl.Curl()
+
+        def _put_user_agent(self, ua):
+            try:
+                ua.reset()
+                self._user_agent_pool.put(ua, False)
+            except:
+                ua.close()
+
+        @staticmethod
+        def _socket_open(family, socktype, protocol, address=None):
+            """Because pycurl doesn't have CURLOPT_TCP_KEEPALIVE"""
+            s = socket.socket(family, socktype, protocol)
+            s.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 75)
+            s.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75)
+            return s
  
          def get(self, locator, timeout=None):
              # locator is a KeepLocator object.
              url = self.root + str(locator)
              _logger.debug("Request: GET %s", url)
+            curl = self._get_user_agent()
              try:
                  with timer.Timer() as t:
-                    result = self.session.get(url.encode('utf-8'),
-                                          headers=self.get_headers,
-                                          timeout=timeout)
+                    self._headers = {}
+                    response_body = cStringIO.StringIO()
+                    curl.setopt(pycurl.NOSIGNAL, 1)
+                    curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+                    curl.setopt(pycurl.URL, url.encode('utf-8'))
+                    curl.setopt(pycurl.HTTPHEADER, [
+                        '{}: {}'.format(k,v) for k,v in self.get_headers.iteritems()])
+                    curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
+                    curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                    self._setcurltimeouts(curl, timeout)
+                    try:
+                        curl.perform()
+                    except Exception as e:
+                        raise arvados.errors.HttpError(0, str(e))
+                    self._result = {
+                        'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
+                        'body': response_body.getvalue(),
+                        'headers': self._headers,
+                        'error': False,
+                    }
+                ok = retry.check_http_response_success(self._result['status_code'])
+                if not ok:
+                    self._result['error'] = arvados.errors.HttpError(
+                        self._result['status_code'],
+                        self._headers.get('x-status-line', 'Error'))
              except self.HTTP_ERRORS as e:
-                _logger.debug("Request fail: GET %s => %s: %s",
-                              url, type(e), str(e))
-                self.last_result = e
+                self._result = {
+                    'error': e,
+                }
+                ok = False
+            self._usable = ok != False
+            if self._result.get('status_code', None):
+                # The client worked well enough to get an HTTP status
+                # code, so presumably any problems are just on the
+                # server side and it's OK to reuse the client.
+                self._put_user_agent(curl)
              else:
-                self.last_result = result
-                self.success_flag = retry.check_http_response_success(result)
-                content = result.content
-                _logger.info("%s response: %s bytes in %s msec (%.3f MiB/sec)",
-                             self.last_status(), len(content), t.msecs,
-                             (len(content)/(1024.0*1024))/t.secs if t.secs > 0 else 0)
-                if self.success_flag:
-                    resp_md5 = hashlib.md5(content).hexdigest()
-                    if resp_md5 == locator.md5sum:
-                        return content
-                    _logger.warning("Checksum fail: md5(%s) = %s",
-                                    url, resp_md5)
-            return None
+                # Don't return this client to the pool, in case it's
+                # broken.
+                curl.close()
+            if not ok:
+                _logger.debug("Request fail: GET %s => %s: %s",
+                              url, type(self._result['error']), str(self._result['error']))
+                return None
+            _logger.info("%s response: %s bytes in %s msec (%.3f MiB/sec)",
+                         self._result['status_code'],
+                         len(self._result['body']),
+                         t.msecs,
+                         (len(self._result['body'])/(1024.0*1024))/t.secs if t.secs > 0 else 0)
+            resp_md5 = hashlib.md5(self._result['body']).hexdigest()
+            if resp_md5 != locator.md5sum:
+                _logger.warning("Checksum fail: md5(%s) = %s",
+                                url, resp_md5)
+                self._result['error'] = arvados.errors.HttpError(
+                    0, 'Checksum fail')
+                return None
+            return self._result['body']
  
          def put(self, hash_s, body, timeout=None):
              url = self.root + hash_s
              _logger.debug("Request: PUT %s", url)
+            curl = self._get_user_agent()
              try:
-                result = self.session.put(url.encode('utf-8'),
-                                      data=body,
-                                      headers=self.put_headers,
-                                      timeout=timeout)
+                self._headers = {}
+                body_reader = cStringIO.StringIO(body)
+                response_body = cStringIO.StringIO()
+                curl.setopt(pycurl.NOSIGNAL, 1)
+                curl.setopt(pycurl.OPENSOCKETFUNCTION, self._socket_open)
+                curl.setopt(pycurl.URL, url.encode('utf-8'))
+                # Using UPLOAD tells cURL to wait for a "go ahead" from the
+                # Keep server (in the form of a HTTP/1.1 "100 Continue"
+                # response) instead of sending the request body immediately.
+                # This allows the server to reject the request if the request
+                # is invalid or the server is read-only, without waiting for
+                # the client to send the entire block.
+                curl.setopt(pycurl.UPLOAD, True)
+                curl.setopt(pycurl.INFILESIZE, len(body))
+                curl.setopt(pycurl.READFUNCTION, body_reader.read)
+                curl.setopt(pycurl.HTTPHEADER, [
+                    '{}: {}'.format(k,v) for k,v in self.put_headers.iteritems()])
+                curl.setopt(pycurl.WRITEFUNCTION, response_body.write)
+                curl.setopt(pycurl.HEADERFUNCTION, self._headerfunction)
+                self._setcurltimeouts(curl, timeout)
+                try:
+                    curl.perform()
+                except Exception as e:
+                    raise arvados.errors.HttpError(0, str(e))
+                self._result = {
+                    'status_code': curl.getinfo(pycurl.RESPONSE_CODE),
+                    'body': response_body.getvalue(),
+                    'headers': self._headers,
+                    'error': False,
+                }
+                ok = retry.check_http_response_success(self._result['status_code'])
+                if not ok:
+                    self._result['error'] = arvados.errors.HttpError(
+                        self._result['status_code'],
+                        self._headers.get('x-status-line', 'Error'))
              except self.HTTP_ERRORS as e:
+                self._result = {
+                    'error': e,
+                }
+                ok = False
+            self._usable = ok != False # still usable if ok is True or None
+            if self._result.get('status_code', None):
+                # Client is functional. See comment in get().
+                self._put_user_agent(curl)
+            else:
+                curl.close()
+            if not ok:
                  _logger.debug("Request fail: PUT %s => %s: %s",
-                              url, type(e), str(e))
-                self.last_result = e
+                              url, type(self._result['error']), str(self._result['error']))
+                return False
+            return True
+
+        def _setcurltimeouts(self, curl, timeouts):
+            if not timeouts:
+                return
+            elif isinstance(timeouts, tuple):
+                conn_t, xfer_t = timeouts
+            else:
+                conn_t, xfer_t = (timeouts, timeouts)
+            curl.setopt(pycurl.CONNECTTIMEOUT_MS, int(conn_t*1000))
+            curl.setopt(pycurl.TIMEOUT_MS, int(xfer_t*1000))
+
+        def _headerfunction(self, header_line):
+            header_line = header_line.decode('iso-8859-1')
+            if ':' in header_line:
+                name, value = header_line.split(':', 1)
+                name = name.strip().lower()
+                value = value.strip()
+            elif self._headers:
+                name = self._lastheadername
+                value = self._headers[name] + ' ' + header_line.strip()
+            elif header_line.startswith('HTTP/'):
+                name = 'x-status-line'
+                value = header_line
              else:
-                self.last_result = result
-                self.success_flag = retry.check_http_response_success(result)
-            return self.success_flag
+                _logger.error("Unexpected header line: %s", header_line)
+                return
+            self._lastheadername = name
+            self._headers[name] = value
+            # Returning None implies all bytes were written
  
  
      class KeepWriterThread(threading.Thread):
@@ -407,9 +536,8 @@ class KeepClient(object):
                  self.args['data_hash'],
                  self.args['data'],
                  timeout=self.args.get('timeout', None)))
-            status = self.service.last_status()
+            result = self.service.last_result()
              if self._success:
-                result = self.service.last_result
                  _logger.debug("KeepWriterThread %s succeeded %s+%i %s",
                                str(threading.current_thread()),
                                self.args['data_hash'],
@@ -420,14 +548,15 @@ class KeepClient(object):
                  # we're talking to a proxy or other backend that
                  # stores to multiple copies for us.
                  try:
-                    replicas_stored = int(result.headers['x-keep-replicas-stored'])
+                    replicas_stored = int(result['headers']['x-keep-replicas-stored'])
                  except (KeyError, ValueError):
                      replicas_stored = 1
-                limiter.save_response(result.content.strip(), replicas_stored)
-            elif status is not None:
+                limiter.save_response(result['body'].strip(), replicas_stored)
+            elif result.get('status_code', None):
                  _logger.debug("Request fail: PUT %s => %s %s",
-                              self.args['data_hash'], status,
-                              self.service.last_result.content)
+                              self.args['data_hash'],
+                              result['status_code'],
+                              result['body'])
  
  
      def __init__(self, api_client=None, proxy=None,
@@ -484,10 +613,6 @@ class KeepClient(object):
            The default number of times to retry failed requests.
            This will be used as the default num_retries value when get() and
            put() are called.  Default 0.
-
-        :session:
-          The requests.Session object to use for get() and put() requests.
-          Will create one if not specified.
          """
          self.lock = threading.Lock()
          if proxy is None:
@@ -506,6 +631,7 @@ class KeepClient(object):
          self.block_cache = block_cache if block_cache else KeepBlockCache()
          self.timeout = timeout
          self.proxy_timeout = proxy_timeout
+        self._user_agent_pool = Queue.LifoQueue()
  
          if local_store:
              self.local_store = local_store
@@ -513,15 +639,16 @@ class KeepClient(object):
              self.put = self.local_store_put
          else:
              self.num_retries = num_retries
-            self.session = session if session is not None else requests.Session()
              if proxy:
                  if not proxy.endswith('/'):
                      proxy += '/'
                  self.api_token = api_token
+                self._gateway_services = {}
                  self._keep_services = [{
                      'uuid': 'proxy',
                      '_service_root': proxy,
                      }]
+                self._writable_services = self._keep_services
                  self.using_proxy = True
                  self._static_services_list = True
              else:
@@ -531,7 +658,9 @@ class KeepClient(object):
                      api_client = arvados.api('v1')
                  self.api_client = api_client
                  self.api_token = api_client.api_token
+                self._gateway_services = {}
                  self._keep_services = None
+                self._writable_services = None
                  self.using_proxy = None
                  self._static_services_list = False
  
@@ -560,21 +689,38 @@ class KeepClient(object):
              except Exception:  # API server predates Keep services.
                  keep_services = self.api_client.keep_disks().list()
  
-            self._keep_services = keep_services.execute().get('items')
-            if not self._keep_services:
+            accessible = keep_services.execute().get('items')
+            if not accessible:
                  raise arvados.errors.NoKeepServersError()
  
-            self.using_proxy = any(ks.get('service_type') == 'proxy'
-                                   for ks in self._keep_services)
-
              # Precompute the base URI for each service.
-            for r in self._keep_services:
-                r['_service_root'] = "{}://[{}]:{:d}/".format(
+            for r in accessible:
+                host = r['service_host']
+                if not host.startswith('[') and host.find(':') >= 0:
+                    # IPv6 URIs must be formatted like http://[::1]:80/...
+                    host = '[' + host + ']'
+                r['_service_root'] = "{}://{}:{:d}/".format(
                      'https' if r['service_ssl_flag'] else 'http',
-                    r['service_host'],
+                    host,
                      r['service_port'])
+
+            # Gateway services are only used when specified by UUID,
+            # so there's nothing to gain by filtering them by
+            # service_type.
+            self._gateway_services = {ks.get('uuid'): ks for ks in accessible}
+            _logger.debug(str(self._gateway_services))
+
+            self._keep_services = [
+                ks for ks in accessible
+                if ks.get('service_type') in ['disk', 'proxy']]
+            self._writable_services = [
+                ks for ks in accessible
+                if (ks.get('service_type') in ['disk', 'proxy']) and (True != ks.get('read_only'))]
              _logger.debug(str(self._keep_services))
  
+            self.using_proxy = any(ks.get('service_type') == 'proxy'
+                                   for ks in self._keep_services)
+
      def _service_weight(self, data_hash, service_uuid):
          """Compute the weight of a Keep service endpoint for a data
          block with a known hash.
@@ -584,34 +730,53 @@ class KeepClient(object):
          """
          return hashlib.md5(data_hash + service_uuid[-15:]).hexdigest()
  
-    def weighted_service_roots(self, data_hash, force_rebuild=False):
+    def weighted_service_roots(self, locator, force_rebuild=False, need_writable=False):
          """Return an array of Keep service endpoints, in the order in
          which they should be probed when reading or writing data with
-        the given hash.
+        the given hash+hints.
          """
          self.build_services_list(force_rebuild)
  
-        # Sort the available services by weight (heaviest first) for
-        # this data_hash, and return their service_roots (base URIs)
+        sorted_roots = []
+
+        # Use the services indicated by the given +K@... remote
+        # service hints, if any are present and can be resolved to a
+        # URI.
+        for hint in locator.hints:
+            if hint.startswith('K@'):
+                if len(hint) == 7:
+                    sorted_roots.append(
+                        "https://keep.{}.arvadosapi.com/".format(hint[2:]))
+                elif len(hint) == 29:
+                    svc = self._gateway_services.get(hint[2:])
+                    if svc:
+                        sorted_roots.append(svc['_service_root'])
+
+        # Sort the available local services by weight (heaviest first)
+        # for this locator, and return their service_roots (base URIs)
          # in that order.
-        sorted_roots = [
+        use_services = self._keep_services
+        if need_writable:
+          use_services = self._writable_services
+        sorted_roots.extend([
              svc['_service_root'] for svc in sorted(
-                self._keep_services,
+                use_services,
                  reverse=True,
-                key=lambda svc: self._service_weight(data_hash, svc['uuid']))]
-        _logger.debug(data_hash + ': ' + str(sorted_roots))
+                key=lambda svc: self._service_weight(locator.md5sum, svc['uuid']))])
+        _logger.debug("{}: {}".format(locator, sorted_roots))
          return sorted_roots
  
-    def map_new_services(self, roots_map, md5_s, force_rebuild, **headers):
+    def map_new_services(self, roots_map, locator, force_rebuild, need_writable, **headers):
          # roots_map is a dictionary, mapping Keep service root strings
          # to KeepService objects.  Poll for Keep services, and add any
          # new ones to roots_map.  Return the current list of local
          # root strings.
          headers.setdefault('Authorization', "OAuth2 %s" % (self.api_token,))
-        local_roots = self.weighted_service_roots(md5_s, force_rebuild)
+        local_roots = self.weighted_service_roots(locator, force_rebuild, need_writable)
          for root in local_roots:
              if root not in roots_map:
-                roots_map[root] = self.KeepService(root, self.session, **headers)
+                roots_map[root] = self.KeepService(
+                    root, self._user_agent_pool, **headers)
          return local_roots
  
      @staticmethod
@@ -635,7 +800,7 @@ class KeepClient(object):
      def get_from_cache(self, loc):
          """Fetch a block only if is in the cache, otherwise return None."""
          slot = self.block_cache.get(loc)
-        if slot.ready.is_set():
+        if slot is not None and slot.ready.is_set():
              return slot.get()
          else:
              return None
@@ -664,29 +829,45 @@ class KeepClient(object):
          if ',' in loc_s:
              return ''.join(self.get(x) for x in loc_s.split(','))
          locator = KeepLocator(loc_s)
-        expect_hash = locator.md5sum
-        slot, first = self.block_cache.reserve_cache(expect_hash)
+        slot, first = self.block_cache.reserve_cache(locator.md5sum)
          if not first:
              v = slot.get()
              return v
  
+        # If the locator has hints specifying a prefix (indicating a
+        # remote keepproxy) or the UUID of a local gateway service,
+        # read data from the indicated service(s) instead of the usual
+        # list of local disk services.
+        hint_roots = ['http://keep.{}.arvadosapi.com/'.format(hint[2:])
+                      for hint in locator.hints if hint.startswith('K@') and len(hint) == 7]
+        hint_roots.extend([self._gateway_services[hint[2:]]['_service_root']
+                           for hint in locator.hints if (
+                                   hint.startswith('K@') and
+                                   len(hint) == 29 and
+                                   self._gateway_services.get(hint[2:])
+                                   )])
+        # Map root URLs to their KeepService objects.
+        roots_map = {
+            root: self.KeepService(root, self._user_agent_pool)
+            for root in hint_roots
+        }
+
          # See #3147 for a discussion of the loop implementation.  Highlights:
          # * Refresh the list of Keep services after each failure, in case
          #   it's being updated.
          # * Retry until we succeed, we're out of retries, or every available
          #   service has returned permanent failure.
-        hint_roots = ['http://keep.{}.arvadosapi.com/'.format(hint[2:])
-                      for hint in locator.hints if hint.startswith('K@')]
-        # Map root URLs their KeepService objects.
-        roots_map = {root: self.KeepService(root, self.session) for root in hint_roots}
+        sorted_roots = []
+        roots_map = {}
          blob = None
          loop = retry.RetryLoop(num_retries, self._check_loop_result,
                                 backoff_start=2)
          for tries_left in loop:
              try:
-                local_roots = self.map_new_services(
-                    roots_map, expect_hash,
-                    force_rebuild=(tries_left < num_retries))
+                sorted_roots = self.map_new_services(
+                    roots_map, locator,
+                    force_rebuild=(tries_left < num_retries),
+                    need_writable=False)
              except Exception as error:
                  loop.save_result(error)
                  continue
@@ -694,7 +875,7 @@ class KeepClient(object):
              # Query KeepService objects that haven't returned
              # permanent failure, in our specified shuffle order.
              services_to_try = [roots_map[root]
-                               for root in (local_roots + hint_roots)
+                               for root in sorted_roots
                                 if roots_map[root].usable()]
              for keep_service in services_to_try:
                  blob = keep_service.get(locator, timeout=self.current_timeout(num_retries-tries_left))
@@ -708,22 +889,17 @@ class KeepClient(object):
          if loop.success():
              return blob
  
-        try:
-            all_roots = local_roots + hint_roots
-        except NameError:
-            # We never successfully fetched local_roots.
-            all_roots = hint_roots
          # Q: Including 403 is necessary for the Keep tests to continue
          # passing, but maybe they should expect KeepReadError instead?
-        not_founds = sum(1 for key in all_roots
-                         if roots_map[key].last_status() in {403, 404, 410})
-        service_errors = ((key, roots_map[key].last_result)
-                          for key in all_roots)
+        not_founds = sum(1 for key in sorted_roots
+                         if roots_map[key].last_result().get('status_code', None) in {403, 404, 410})
+        service_errors = ((key, roots_map[key].last_result()['error'])
+                          for key in sorted_roots)
          if not roots_map:
              raise arvados.errors.KeepReadError(
                  "failed to read {}: no Keep services available ({})".format(
                      loc_s, loop.last_result()))
-        elif not_founds == len(all_roots):
+        elif not_founds == len(sorted_roots):
              raise arvados.errors.NotFoundError(
                  "{} not found".format(loc_s), service_errors)
          else:
@@ -753,11 +929,13 @@ class KeepClient(object):
          if isinstance(data, unicode):
              data = data.encode("ascii")
          elif not isinstance(data, str):
-            raise arvados.errors.ArgumentError("Argument 'data' to KeepClient.put must be type 'str'")
+            raise arvados.errors.ArgumentError("Argument 'data' to KeepClient.put is not type 'str'")
  
          data_hash = hashlib.md5(data).hexdigest()
+        loc_s = data_hash + '+' + str(len(data))
          if copies < 1:
-            return data_hash
+            return loc_s
+        locator = KeepLocator(loc_s)
  
          headers = {}
          if self.using_proxy:
@@ -770,8 +948,8 @@ class KeepClient(object):
          for tries_left in loop:
              try:
                  local_roots = self.map_new_services(
-                    roots_map, data_hash,
-                    force_rebuild=(tries_left < num_retries), **headers)
+                    roots_map, locator,
+                    force_rebuild=(tries_left < num_retries), need_writable=True, **headers)
              except Exception as error:
                  loop.save_result(error)
                  continue
@@ -800,9 +978,9 @@ class KeepClient(object):
                  "failed to write {}: no Keep services available ({})".format(
                      data_hash, loop.last_result()))
          else:
-            service_errors = ((key, roots_map[key].last_result)
+            service_errors = ((key, roots_map[key].last_result()['error'])
                                for key in local_roots
-                              if not roots_map[key].success_flag)
+                              if roots_map[key].last_result()['error'])
              raise arvados.errors.KeepWriteError(
                  "failed to write {} (wanted {} copies but wrote {})".format(
                      data_hash, copies, thread_limiter.done()), service_errors, label="service")
diff --git a/sdk/python/arvados/retry.py b/sdk/python/arvados/retry.py

index 52a68faa6f6b511bf78378cc944aa6e4c5914c33..d8f5317d2c4c160c833339929302edaae679d6f5 100644 (file)
--- a/sdk/python/arvados/retry.py
+++ b/sdk/python/arvados/retry.py
@@ -2,6 +2,7 @@
  
  import functools
  import inspect
+import pycurl
  import time
  
  from collections import deque
@@ -109,11 +110,11 @@ class RetryLoop(object):
                  "queried loop results before any were recorded")
  
  
-def check_http_response_success(result):
-    """Convert a 'requests' response to a loop control flag.
+def check_http_response_success(status_code):
+    """Convert an HTTP status code to a loop control flag.
  
-    Pass this method a requests.Response object.  It returns True if
-    the response indicates success, None if it indicates temporary
+    Pass this method a numeric HTTP status code.  It returns True if
+    the code indicates success, None if it indicates temporary
      failure, and False otherwise.  You can use this as the
      success_check for a RetryLoop.
  
@@ -128,15 +129,11 @@ def check_http_response_success(result):
        3xx status codes.  They don't indicate success, and you can't
        retry those requests verbatim.
      """
-    try:
-        status = result.status_code
-    except Exception:
-        return None
-    if status in _HTTP_SUCCESSES:
+    if status_code in _HTTP_SUCCESSES:
          return True
-    elif status in _HTTP_CAN_RETRY:
+    elif status_code in _HTTP_CAN_RETRY:
          return None
-    elif 100 <= status < 600:
+    elif 100 <= status_code < 600:
          return False
      else:
          return None  # Get well soon, server.
@@ -151,8 +148,7 @@ def retry_method(orig_func):
      """
      @functools.wraps(orig_func)
      def num_retries_setter(self, *args, **kwargs):
-        arg_vals = inspect.getcallargs(orig_func, self, *args, **kwargs)
-        if arg_vals['num_retries'] is None:
+        if kwargs.get('num_retries') is None:
              kwargs['num_retries'] = self.num_retries
          return orig_func(self, *args, **kwargs)
      return num_retries_setter
diff --git a/sdk/python/arvados/stream.py b/sdk/python/arvados/stream.py

index 3a42aa010112a7e52d36319ff15558d7ff6298f0..afc202e1806cd9c5ce32ce4aa8e09777d527be0b 100644 (file)
--- a/sdk/python/arvados/stream.py
+++ b/sdk/python/arvados/stream.py
@@ -35,7 +35,7 @@ class StreamReader(object):
              s = re.match(r'^[0-9a-f]{32}\+(\d+)(\+\S+)*$', tok)
              if s:
                  blocksize = long(s.group(1))
-                self._data_locators.append(Range(tok, streamoffset, blocksize))
+                self._data_locators.append(Range(tok, streamoffset, blocksize, 0))
                  streamoffset += blocksize
                  continue
  
@@ -45,7 +45,7 @@ class StreamReader(object):
                  size = long(s.group(2))
                  name = s.group(3).replace('\\040', ' ')
                  if name not in self._files:
-                    self._files[name] = StreamFileReader(self, [Range(pos, 0, size)], name)
+                    self._files[name] = StreamFileReader(self, [Range(pos, 0, size, 0)], name)
                  else:
                      filereader = self._files[name]
                      filereader.segments.append(Range(pos, filereader.size(), size))
diff --git a/sdk/python/arvados/util.py b/sdk/python/arvados/util.py

index 79692ae7084827257844b1a7d268cdaf667d3629..3e59bfb861cbb3e135990f8ba3aa186070a0a5bc 100644 (file)
--- a/sdk/python/arvados/util.py
+++ b/sdk/python/arvados/util.py
@@ -1,11 +1,14 @@
  import fcntl
  import hashlib
+import httplib2
  import os
  import re
  import subprocess
  import errno
  import sys
-from arvados.collection import *
+
+import arvados
+from arvados.collection import CollectionReader
  
  HEX_RE = re.compile(r'^[0-9a-fA-F]+$')
  
@@ -42,7 +45,7 @@ def run_command(execargs, **kwargs):
      p = subprocess.Popen(execargs, **kwargs)
      stdoutdata, stderrdata = p.communicate(None)
      if p.returncode != 0:
-        raise errors.CommandFailedError(
+        raise arvados.errors.CommandFailedError(
              "run_command %s exit %d:\n%s" %
              (execargs, p.returncode, stderrdata))
      return stdoutdata, stderrdata
@@ -107,7 +110,7 @@ def tarball_extract(tarball, path):
              elif re.search('\.tar$', f.name()):
                  p = tar_extractor(path, '')
              else:
-                raise errors.AssertionError(
+                raise arvados.errors.AssertionError(
                      "tarball_extract cannot handle filename %s" % f.name())
              while True:
                  buf = f.read(2**20)
@@ -118,7 +121,7 @@ def tarball_extract(tarball, path):
              p.wait()
              if p.returncode != 0:
                  lockfile.close()
-                raise errors.CommandFailedError(
+                raise arvados.errors.CommandFailedError(
                      "tar exited %d" % p.returncode)
          os.symlink(tarball, os.path.join(path, '.locator'))
      tld_extracts = filter(lambda f: f != '.locator', os.listdir(path))
@@ -162,7 +165,7 @@ def zipball_extract(zipball, path):
  
          for f in CollectionReader(zipball).all_files():
              if not re.search('\.zip$', f.name()):
-                raise errors.NotImplementedError(
+                raise arvados.errors.NotImplementedError(
                      "zipball_extract cannot handle filename %s" % f.name())
              zip_filename = os.path.join(path, os.path.basename(f.name()))
              zip_file = open(zip_filename, 'wb')
@@ -183,7 +186,7 @@ def zipball_extract(zipball, path):
              p.wait()
              if p.returncode != 0:
                  lockfile.close()
-                raise errors.CommandFailedError(
+                raise arvados.errors.CommandFailedError(
                      "unzip exited %d" % p.returncode)
              os.unlink(zip_filename)
          os.symlink(zipball, os.path.join(path, '.locator'))
@@ -247,7 +250,7 @@ def collection_extract(collection, path, files=[], decompress=True):
                      outfile.write(buf)
                  outfile.close()
      if len(files_got) < len(files):
-        raise errors.AssertionError(
+        raise arvados.errors.AssertionError(
              "Wanted files %s but only got %s from %s" %
              (files, files_got,
               [z.name() for z in CollectionReader(collection).all_files()]))
@@ -302,7 +305,7 @@ def stream_extract(stream, path, files=[], decompress=True):
                  outfile.write(buf)
              outfile.close()
      if len(files_got) < len(files):
-        raise errors.AssertionError(
+        raise arvados.errors.AssertionError(
              "Wanted files %s but only got %s from %s" %
              (files, files_got, [z.name() for z in stream.all_files()]))
      lockfile.close()
@@ -349,8 +352,8 @@ def is_hex(s, *length_args):
      """
      num_length_args = len(length_args)
      if num_length_args > 2:
-        raise errors.ArgumentError("is_hex accepts up to 3 arguments ({} given)"
-                                   .format(1 + num_length_args))
+        raise arvados.errors.ArgumentError(
+            "is_hex accepts up to 3 arguments ({} given)".format(1 + num_length_args))
      elif num_length_args == 2:
          good_len = (length_args[0] <= len(s) <= length_args[1])
      elif num_length_args == 1:
@@ -369,3 +372,20 @@ def list_all(fn, num_retries=0, **kwargs):
          items_available = c['items_available']
          offset = c['offset'] + len(c['items'])
      return items
+
+def ca_certs_path(fallback=httplib2.CA_CERTS):
+    """Return the path of the best available CA certs source.
+
+    This function searches for various distribution sources of CA
+    certificates, and returns the first it finds.  If it doesn't find any,
+    it returns the value of `fallback` (httplib2's CA certs by default).
+    """
+    for ca_certs_path in [
+        # Debian:
+        '/etc/ssl/certs/ca-certificates.crt',
+        # Red Hat:
+        '/etc/pki/tls/certs/ca-bundle.crt',
+        ]:
+        if os.path.exists(ca_certs_path):
+            return ca_certs_path
+    return fallback
diff --git a/sdk/python/bin/arv-get b/sdk/python/bin/arv-get

index 2451416dae38da1932f22fb7c6599b8a82e55110..60d4bec3b95c429643d7df4a600f72754954809a 100755 (executable)
--- a/sdk/python/bin/arv-get
+++ b/sdk/python/bin/arv-get
@@ -24,10 +24,9 @@ parser.add_argument('locator', type=str,
                      help="""
  Collection locator, optionally with a file path or prefix.
  """)
-parser.add_argument('destination', type=str, nargs='?', default='/dev/stdout',
+parser.add_argument('destination', type=str, nargs='?', default='-',
                      help="""
-Local file or directory where the data is to be written. Default:
-/dev/stdout.
+Local file or directory where the data is to be written. Default: stdout.
  """)
  group = parser.add_mutually_exclusive_group()
  group.add_argument('--progress', action='store_true',
@@ -74,7 +73,7 @@ group.add_argument('-f', action='store_true',
                     help="""
  Overwrite existing files while writing. The default behavior is to
  refuse to write *anything* if any of the output files already
-exist. As a special case, -f is not needed to write to /dev/stdout.
+exist. As a special case, -f is not needed to write to stdout.
  """)
  group.add_argument('--skip-existing', action='store_true',
                     help="""
@@ -100,9 +99,10 @@ if not args.r and (os.path.isdir(args.destination) or
      logger.debug("Appended source file name to destination directory: %s",
                   args.destination)
  
-if args.destination == '-':
-    args.destination = '/dev/stdout'
  if args.destination == '/dev/stdout':
+    args.destination = "-"
+
+if args.destination == '-':
      # Normally you have to use -f to write to a file (or device) that
      # already exists, but "-" and "/dev/stdout" are common enough to
      # merit a special exception.
@@ -115,7 +115,7 @@ else:
  # that isn't a tty.
  if (not (args.batch_progress or args.no_progress)
      and sys.stderr.isatty()
-    and (args.destination != '/dev/stdout'
+    and (args.destination != '-'
           or not sys.stdout.isatty())):
      args.progress = True
  
@@ -134,9 +134,12 @@ if not get_prefix:
          if not args.f:
              open_flags |= os.O_EXCL
          try:
-            out_fd = os.open(args.destination, open_flags)
-            with os.fdopen(out_fd, 'wb') as out_file:
-                out_file.write(reader.manifest_text())
+            if args.destination == "-":
+                sys.stdout.write(reader.manifest_text())
+            else:
+                out_fd = os.open(args.destination, open_flags)
+                with os.fdopen(out_fd, 'wb') as out_file:
+                    out_file.write(reader.manifest_text())
          except (IOError, OSError) as error:
              abort("can't write to '{}': {}".format(args.destination, error))
          except (arvados.errors.ApiError, arvados.errors.KeepReadError) as error:
@@ -156,12 +159,15 @@ try:
                  if 0 != string.find(os.path.join(s.name(), f.name()),
                                      '.' + get_prefix):
                      continue
-                dest_path = os.path.join(
-                    args.destination,
-                    os.path.join(s.name(), f.name())[len(get_prefix)+1:])
-                if (not (args.n or args.f or args.skip_existing) and
-                    os.path.exists(dest_path)):
-                    abort('Local file %s already exists.' % (dest_path,))
+                if args.destination == "-":
+                    dest_path = "-"
+                else:
+                    dest_path = os.path.join(
+                        args.destination,
+                        os.path.join(s.name(), f.name())[len(get_prefix)+1:])
+                    if (not (args.n or args.f or args.skip_existing) and
+                        os.path.exists(dest_path)):
+                        abort('Local file %s already exists.' % (dest_path,))
              else:
                  if os.path.join(s.name(), f.name()) != '.' + get_prefix:
                      continue
@@ -178,20 +184,23 @@ for s,f,outfilename in todo:
      outfile = None
      digestor = None
      if not args.n:
-        if args.skip_existing and os.path.exists(outfilename):
-            logger.debug('Local file %s exists. Skipping.', outfilename)
-            continue
-        elif not args.f and (os.path.isfile(outfilename) or
-                           os.path.isdir(outfilename)):
-            # Good thing we looked again: apparently this file wasn't
-            # here yet when we checked earlier.
-            abort('Local file %s already exists.' % (outfilename,))
-        if args.r:
-            arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
-        try:
-            outfile = open(outfilename, 'wb')
-        except Exception as error:
-            abort('Open(%s) failed: %s' % (outfilename, error))
+        if outfilename == "-":
+            outfile = sys.stdout
+        else:
+            if args.skip_existing and os.path.exists(outfilename):
+                logger.debug('Local file %s exists. Skipping.', outfilename)
+                continue
+            elif not args.f and (os.path.isfile(outfilename) or
+                               os.path.isdir(outfilename)):
+                # Good thing we looked again: apparently this file wasn't
+                # here yet when we checked earlier.
+                abort('Local file %s already exists.' % (outfilename,))
+            if args.r:
+                arvados.util.mkdir_dash_p(os.path.dirname(outfilename))
+            try:
+                outfile = open(outfilename, 'wb')
+            except Exception as error:
+                abort('Open(%s) failed: %s' % (outfilename, error))
      if args.hash:
          digestor = hashlib.new(args.hash)
      try:
@@ -216,8 +225,8 @@ for s,f,outfilename in todo:
              sys.stderr.write("%s  %s/%s\n"
                               % (digestor.hexdigest(), s.name(), f.name()))
      except KeyboardInterrupt:
-        if outfile and outfilename != '/dev/stdout':
-            os.unlink(outfilename)
+        if outfile and (outfile.fileno() > 2) and not outfile.closed:
+            os.unlink(outfile.name)
          break
  
  if args.progress:
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

index ca28025fea64ae6b388af4388337814494a8c0c8..5c0b09d73096874c0d9e86907cf0fde873dadaa7 100644 (file)
--- a/sdk/python/setup.py
+++ b/sdk/python/setup.py
@@ -26,23 +26,23 @@ setup(name='arvados-python-client',
        license='Apache 2.0',
        packages=find_packages(),
        scripts=[
-        'bin/arv-copy',
-        'bin/arv-get',
-        'bin/arv-keepdocker',
-        'bin/arv-ls',
-        'bin/arv-normalize',
-        'bin/arv-put',
-        'bin/arv-run',
-        'bin/arv-ws'
-        ],
+          'bin/arv-copy',
+          'bin/arv-get',
+          'bin/arv-keepdocker',
+          'bin/arv-ls',
+          'bin/arv-normalize',
+          'bin/arv-put',
+          'bin/arv-run',
+          'bin/arv-ws'
+      ],
        install_requires=[
-        'python-gflags',
-        'google-api-python-client',
-        'httplib2',
-        'requests>=2.4',
-        'urllib3',
-        'ws4py'
-        ],
+          'ciso8601',
+          'google-api-python-client',
+          'httplib2',
+          'pycurl>=7.19.5.1',
+          'python-gflags',
+          'ws4py'
+      ],
        test_suite='tests',
        tests_require=['mock>=1.0', 'PyYAML'],
        zip_safe=False,
diff --git a/sdk/python/tests/arvados_testutil.py b/sdk/python/tests/arvados_testutil.py

index 644dfffbaca0657a934a43cf0742e03cc227f62b..6e2a07888662172fd6f26b335a0206db4ef15e98 100644 (file)
--- a/sdk/python/tests/arvados_testutil.py
+++ b/sdk/python/tests/arvados_testutil.py
@@ -8,8 +8,8 @@ import httplib2
  import io
  import mock
  import os
+import pycurl
  import Queue
-import requests
  import shutil
  import tempfile
  import unittest
@@ -43,44 +43,80 @@ def mock_responses(body, *codes, **headers):
      return mock.patch('httplib2.Http.request', side_effect=queue_with((
          (fake_httplib2_response(code, **headers), body) for code in codes)))
  
-# fake_requests_response, mock_get_responses and mock_put_responses
-# mock calls to requests.get() and requests.put()
-def fake_requests_response(code, body, **headers):
-    r = requests.Response()
-    r.status_code = code
-    r.reason = httplib.responses.get(code, "Unknown Response")
-    r.headers = headers
-    r.raw = io.BytesIO(body)
-    return r
-
-# The following methods patch requests.Session(), where return_value is a mock
-# Session object.  The put/get attributes are set on mock Session, and the
-# desired put/get behavior is set on the put/get mocks.
-
-def mock_put_responses(body, *codes, **headers):
-    m = mock.MagicMock()
+
+class FakeCurl:
+    @classmethod
+    def make(cls, code, body='', headers={}):
+        return mock.Mock(spec=cls, wraps=cls(code, body, headers))
+
+    def __init__(self, code=200, body='', headers={}):
+        self._opt = {}
+        self._got_url = None
+        self._writer = None
+        self._headerfunction = None
+        self._resp_code = code
+        self._resp_body = body
+        self._resp_headers = headers
+
+    def getopt(self, opt):
+        return self._opt.get(str(opt), None)
+
+    def setopt(self, opt, val):
+        self._opt[str(opt)] = val
+        if opt == pycurl.WRITEFUNCTION:
+            self._writer = val
+        elif opt == pycurl.HEADERFUNCTION:
+            self._headerfunction = val
+
+    def perform(self):
+        if not isinstance(self._resp_code, int):
+            raise self._resp_code
+        if self.getopt(pycurl.URL) is None:
+            raise ValueError
+        if self._writer is None:
+            raise ValueError
+        if self._headerfunction:
+            self._headerfunction("HTTP/1.1 {} Status".format(self._resp_code))
+            for k, v in self._resp_headers.iteritems():
+                self._headerfunction(k + ': ' + str(v))
+        self._writer(self._resp_body)
+
+    def close(self):
+        pass
+
+    def reset(self):
+        """Prevent fake UAs from going back into the user agent pool."""
+        raise Exception
+
+    def getinfo(self, opt):
+        if opt == pycurl.RESPONSE_CODE:
+            return self._resp_code
+        raise Exception
+
+def mock_keep_responses(body, *codes, **headers):
+    """Patch pycurl to return fake responses and raise exceptions.
+
+    body can be a string to return as the response body; an exception
+    to raise when perform() is called; or an iterable that returns a
+    sequence of such values.
+    """
+    cm = mock.MagicMock()
      if isinstance(body, tuple):
          codes = list(codes)
          codes.insert(0, body)
-        m.return_value.put.side_effect = queue_with((fake_requests_response(code, b, **headers) for b, code in codes))
+        responses = [
+            FakeCurl.make(code=code, body=b, headers=headers)
+            for b, code in codes
+        ]
      else:
-        m.return_value.put.side_effect = queue_with((fake_requests_response(code, body, **headers) for code in codes))
-    return mock.patch('requests.Session', m)
-
-def mock_get_responses(body, *codes, **headers):
-    m = mock.MagicMock()
-    m.return_value.get.side_effect = queue_with((fake_requests_response(code, body, **headers) for code in codes))
-    return mock.patch('requests.Session', m)
-
-def mock_get(side_effect):
-    m = mock.MagicMock()
-    m.return_value.get.side_effect = side_effect
-    return mock.patch('requests.Session', m)
+        responses = [
+            FakeCurl.make(code=code, body=body, headers=headers)
+            for code in codes
+        ]
+    cm.side_effect = queue_with(responses)
+    cm.responses = responses
+    return mock.patch('pycurl.Curl', cm)
  
-def mock_put(side_effect):
-    m = mock.MagicMock()
-    m.return_value.put.side_effect = side_effect
-    return mock.patch('requests.Session', m)
  
  class MockStreamReader(object):
      def __init__(self, name='.', *data):
@@ -104,7 +140,9 @@ class ApiClientMock(object):
                             service_type='disk',
                             service_host=None,
                             service_port=None,
-                           service_ssl_flag=False):
+                           service_ssl_flag=False,
+                           additional_services=[],
+                           read_only=False):
          if api_mock is None:
              api_mock = self.api_client_mock()
          body = {
@@ -116,7 +154,8 @@ class ApiClientMock(object):
                  'service_port': service_port or 65535-i,
                  'service_ssl_flag': service_ssl_flag,
                  'service_type': service_type,
-            } for i in range(0, count)]
+                'read_only': read_only,
+            } for i in range(0, count)] + additional_services
          }
          self._mock_api_call(api_mock.keep_services().accessible, status, body)
          return api_mock
diff --git a/sdk/python/tests/keepstub.py b/sdk/python/tests/keepstub.py

new file mode 100644 (file)

index 0000000..ef724ed
--- /dev/null
+++ b/sdk/python/tests/keepstub.py
@@ -0,0 +1,104 @@
+import BaseHTTPServer
+import hashlib
+import os
+import re
+import SocketServer
+import time
+
+class Server(SocketServer.ThreadingMixIn, BaseHTTPServer.HTTPServer, object):
+
+    allow_reuse_address = 1
+
+    def __init__(self, *args, **kwargs):
+        self.store = {}
+        self.delays = {
+            # before reading request headers
+            'request': 0,
+            # before reading request body
+            'request_body': 0,
+            # before setting response status and headers
+            'response': 0,
+            # before sending response body
+            'response_body': 0,
+            # before returning from handler (thus setting response EOF)
+            'response_close': 0,
+        }
+        super(Server, self).__init__(*args, **kwargs)
+
+    def setdelays(self, **kwargs):
+        """In future requests, induce delays at the given checkpoints."""
+        for (k, v) in kwargs.iteritems():
+            self.delays.get(k) # NameError if unknown key
+            self.delays[k] = v
+
+    def _sleep_at_least(self, seconds):
+        """Sleep for given time, even if signals are received."""
+        wake = time.time() + seconds
+        todo = seconds
+        while todo > 0:
+            time.sleep(todo)
+            todo = wake - time.time()
+
+    def _do_delay(self, k):
+        self._sleep_at_least(self.delays[k])
+
+
+class Handler(BaseHTTPServer.BaseHTTPRequestHandler, object):
+    def handle(self, *args, **kwargs):
+        self.server._do_delay('request')
+        return super(Handler, self).handle(*args, **kwargs)
+
+    def do_GET(self):
+        self.server._do_delay('response')
+        r = re.search(r'[0-9a-f]{32}', self.path)
+        if not r:
+            return self.send_response(422)
+        datahash = r.group(0)
+        if datahash not in self.server.store:
+            return self.send_response(404)
+        self.send_response(200)
+        self.send_header('Content-type', 'application/octet-stream')
+        self.end_headers()
+        self.server._do_delay('response_body')
+        self.wfile.write(self.server.store[datahash])
+        self.server._do_delay('response_close')
+
+    def do_PUT(self):
+        self.server._do_delay('request_body')
+
+        # The comments at https://bugs.python.org/issue1491 implies that Python
+        # 2.7 BaseHTTPRequestHandler was patched to support 100 Continue, but
+        # reading the actual code that ships in Debian it clearly is not, so we
+        # need to send the response on the socket directly.
+
+        self.wfile.write("%s %d %s\r\n\r\n" %
+                         (self.protocol_version, 100, "Continue"))
+
+        data = self.rfile.read(int(self.headers.getheader('content-length')))
+        datahash = hashlib.md5(data).hexdigest()
+        self.server.store[datahash] = data
+        self.server._do_delay('response')
+        self.send_response(200)
+        self.send_header('Content-type', 'text/plain')
+        self.end_headers()
+        self.server._do_delay('response_body')
+        self.wfile.write(datahash + '+' + str(len(data)))
+        self.server._do_delay('response_close')
+
+    def log_request(self, *args, **kwargs):
+        if os.environ.get('ARVADOS_DEBUG', None):
+            super(Handler, self).log_request(*args, **kwargs)
+
+    def finish(self, *args, **kwargs):
+        """Ignore exceptions, notably "Broken pipe" when client times out."""
+        try:
+            return super(Handler, self).finish(*args, **kwargs)
+        except:
+            pass
+
+    def handle_one_request(self, *args, **kwargs):
+        """Ignore exceptions, notably "Broken pipe" when client times out."""
+        try:
+            return super(Handler, self).handle_one_request(*args, **kwargs)
+        except:
+            pass
diff --git a/sdk/python/tests/manifest_examples.py b/sdk/python/tests/manifest_examples.py

new file mode 100644 (file)

index 0000000..2d8e475
--- /dev/null
+++ b/sdk/python/tests/manifest_examples.py
@@ -0,0 +1,21 @@
+import arvados
+import arvados_testutil as tutil
+import hashlib
+
+class ManifestExamples(object):
+    def make_manifest(self,
+                      bytes_per_block=1,
+                      blocks_per_file=1,
+                      files_per_stream=1,
+                      streams=1):
+        datablip = 'x' * bytes_per_block
+        data_loc = '{}+{}'.format(hashlib.md5(datablip).hexdigest(),
+                                  bytes_per_block)
+        with tutil.mock_keep_responses(data_loc, 200):
+            coll = arvados.CollectionWriter()
+            for si in range(0, streams):
+                for fi in range(0, files_per_stream):
+                    with coll.open("stream{}/file{}.txt".format(si, fi)) as f:
+                        for bi in range(0, blocks_per_file):
+                            f.write(datablip)
+            return coll.manifest_text()
diff --git a/sdk/python/tests/nginx.conf b/sdk/python/tests/nginx.conf

new file mode 100644 (file)

index 0000000..6196605
--- /dev/null
+++ b/sdk/python/tests/nginx.conf
@@ -0,0 +1,31 @@
+daemon off;
+error_log stderr info;          # Yes, must be specified here _and_ cmdline
+events {
+}
+http {
+  access_log /dev/stderr combined;
+  upstream arv-git-http {
+    server localhost:{{GITPORT}};
+  }
+  server {
+    listen *:{{GITSSLPORT}} ssl default_server;
+    server_name _;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://arv-git-http;
+    }
+  }
+  upstream keepproxy {
+    server localhost:{{KEEPPROXYPORT}};
+  }
+  server {
+    listen *:{{KEEPPROXYSSLPORT}} ssl default_server;
+    server_name _;
+    ssl_certificate {{SSLCERT}};
+    ssl_certificate_key {{SSLKEY}};
+    location  / {
+      proxy_pass http://keepproxy;
+    }
+  }
+}
diff --git a/sdk/python/tests/performance/__init__.py b/sdk/python/tests/performance/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/sdk/python/tests/performance/performance_profiler.py b/sdk/python/tests/performance/performance_profiler.py

new file mode 100644 (file)

index 0000000..afa53ae
--- /dev/null
+++ b/sdk/python/tests/performance/performance_profiler.py
@@ -0,0 +1,49 @@
+# Use the "profiled" decorator on a test to get profiling data.
+#
+# Usage:
+#   from performance_profiler import profiled
+#
+#   # See report in tmp/profile/foobar
+#   @profiled
+#   def foobar():
+#       baz = 1
+#
+#   See "test_a_sample.py" for a working example.
+#
+# Performance tests run as part of regular test suite.
+# You can also run only the performance tests using one of the following:
+#     python -m unittest discover tests.performance
+#     ./run-tests.sh WORKSPACE=~/arvados --only sdk/python sdk/python_test="--test-suite=tests.performance"
+
+import functools
+import os
+import pstats
+import sys
+import unittest
+try:
+    import cProfile as profile
+except ImportError:
+    import profile
+
+output_dir = os.path.abspath(os.path.join('tmp', 'profile'))
+if not os.path.exists(output_dir):
+    os.makedirs(output_dir)
+
+def profiled(function):
+    @functools.wraps(function)
+    def profiled_function(*args, **kwargs):
+        outfile = open(os.path.join(output_dir, function.__name__), "w")
+        caught = None
+        pr = profile.Profile()
+        pr.enable()
+        try:
+            ret = function(*args, **kwargs)
+        except Exception as e:
+            caught = e
+        pr.disable()
+        ps = pstats.Stats(pr, stream=outfile)
+        ps.sort_stats('time').print_stats()
+        if caught:
+            raise
+        return ret
+    return profiled_function
diff --git a/sdk/python/tests/performance/test_a_sample.py b/sdk/python/tests/performance/test_a_sample.py

new file mode 100644 (file)

index 0000000..dff0984
--- /dev/null
+++ b/sdk/python/tests/performance/test_a_sample.py
@@ -0,0 +1,15 @@
+import unittest
+
+from performance_profiler import profiled
+
+class PerformanceTestSample(unittest.TestCase):
+    def foo(self):
+        bar = 64
+
+    @profiled
+    def test_profiled_decorator(self):
+        j = 0
+        for i in range(0,2**20):
+            j += i
+        self.foo()
+        print 'Hello'
diff --git a/sdk/python/tests/run_test_server.py b/sdk/python/tests/run_test_server.py

index b9502f0f8e5bb5a6292da85bb69ef1f783163cab..1c5162b97d87e476b9ff3badc82ffce3afdd4d28 100644 (file)
--- a/sdk/python/tests/run_test_server.py
+++ b/sdk/python/tests/run_test_server.py
@@ -1,5 +1,6 @@
  #!/usr/bin/env python
  
+from __future__ import print_function
  import argparse
  import atexit
  import httplib2
@@ -41,6 +42,7 @@ if not os.path.exists(TEST_TMPDIR):
      os.mkdir(TEST_TMPDIR)
  
  my_api_host = None
+_cached_config = {}
  
  def find_server_pid(PID_PATH, wait=10):
      now = time.time()
@@ -113,6 +115,33 @@ def find_available_port():
      sock.close()
      return port
  
+def _wait_until_port_listens(port, timeout=10):
+    """Wait for a process to start listening on the given port.
+
+    If nothing listens on the port within the specified timeout (given
+    in seconds), print a warning on stderr before returning.
+    """
+    try:
+        subprocess.check_output(['which', 'lsof'])
+    except subprocess.CalledProcessError:
+        print("WARNING: No `lsof` -- cannot wait for port to listen. "+
+              "Sleeping 0.5 and hoping for the best.")
+        time.sleep(0.5)
+        return
+    deadline = time.time() + timeout
+    while time.time() < deadline:
+        try:
+            subprocess.check_output(
+                ['lsof', '-t', '-i', 'tcp:'+str(port)])
+        except subprocess.CalledProcessError:
+            time.sleep(0.1)
+            continue
+        return
+    print(
+        "WARNING: Nothing is listening on port {} (waited {} seconds).".
+        format(port, timeout),
+        file=sys.stderr)
+
  def run(leave_running_atexit=False):
      """Ensure an API server is running, and ARVADOS_API_* env vars have
      admin credentials for it.
@@ -163,6 +192,15 @@ def run(leave_running_atexit=False):
      # died, or we have lost our credentials, or something else is
      # preventing us from calling reset(). Start a new one.
  
+    if not os.path.exists('tmp'):
+        os.makedirs('tmp')
+
+    if not os.path.exists('tmp/api'):
+        os.makedirs('tmp/api')
+
+    if not os.path.exists('tmp/logs'):
+        os.makedirs('tmp/logs')
+
      if not os.path.exists('tmp/self-signed.pem'):
          # We assume here that either passenger reports its listening
          # address as https:/0.0.0.0:port/. If it reports "127.0.0.1"
@@ -178,6 +216,13 @@ def run(leave_running_atexit=False):
              '-subj', '/CN=0.0.0.0'],
          stdout=sys.stderr)
  
+    # Install the git repository fixtures.
+    gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
+    gittarball = os.path.join(SERVICES_SRC_DIR, 'api', 'test', 'test.git.tar')
+    if not os.path.isdir(gitdir):
+        os.makedirs(gitdir)
+    subprocess.check_output(['tar', '-xC', gitdir, '-f', gittarball])
+
      port = find_available_port()
      env = os.environ.copy()
      env['RAILS_ENV'] = 'test'
@@ -206,8 +251,10 @@ def run(leave_running_atexit=False):
      my_api_host = match.group(1)
      os.environ['ARVADOS_API_HOST'] = my_api_host
  
-    # Make sure the server has written its pid file before continuing
+    # Make sure the server has written its pid file and started
+    # listening on its TCP port
      find_server_pid(pid_file)
+    _wait_until_port_listens(port)
  
      reset()
      os.chdir(restore_cwd)
@@ -256,20 +303,23 @@ def _start_keep(n, keep_args):
      keep0 = tempfile.mkdtemp()
      port = find_available_port()
      keep_cmd = ["keepstore",
-                "-volumes={}".format(keep0),
+                "-volume={}".format(keep0),
                  "-listen=:{}".format(port),
-                "-pid={}".format("{}/keep{}.pid".format(TEST_TMPDIR, n))]
+                "-pid="+_pidfile('keep{}'.format(n))]
  
      for arg, val in keep_args.iteritems():
          keep_cmd.append("{}={}".format(arg, val))
  
-    kp0 = subprocess.Popen(keep_cmd)
-    with open("{}/keep{}.pid".format(TEST_TMPDIR, n), 'w') as f:
+    kp0 = subprocess.Popen(
+        keep_cmd, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('keep{}'.format(n)), 'w') as f:
          f.write(str(kp0.pid))
  
      with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'w') as f:
          f.write(keep0)
  
+    _wait_until_port_listens(port)
+
      return port
  
  def run_keep(blob_signing_key=None, enforce_permissions=False):
@@ -307,7 +357,7 @@ def run_keep(blob_signing_key=None, enforce_permissions=False):
          }).execute()
  
  def _stop_keep(n):
-    kill_server_pid("{}/keep{}.pid".format(TEST_TMPDIR, n), 0)
+    kill_server_pid(_pidfile('keep{}'.format(n)), 0)
      if os.path.exists("{}/keep{}.volume".format(TEST_TMPDIR, n)):
          with open("{}/keep{}.volume".format(TEST_TMPDIR, n), 'r') as r:
              shutil.rmtree(r.read(), True)
@@ -320,6 +370,8 @@ def stop_keep():
      _stop_keep(1)
  
  def run_keep_proxy():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
      stop_keep_proxy()
  
      admin_token = auth_token('admin')
@@ -328,9 +380,9 @@ def run_keep_proxy():
      env['ARVADOS_API_TOKEN'] = admin_token
      kp = subprocess.Popen(
          ['keepproxy',
-         '-pid={}/keepproxy.pid'.format(TEST_TMPDIR),
+         '-pid='+_pidfile('keepproxy'),
           '-listen=:{}'.format(port)],
-        env=env)
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
  
      api = arvados.api(
          version='v1',
@@ -347,9 +399,105 @@ def run_keep_proxy():
          'service_ssl_flag': False,
      }}).execute()
      os.environ["ARVADOS_KEEP_PROXY"] = "http://localhost:{}".format(port)
+    _setport('keepproxy', port)
+    _wait_until_port_listens(port)
  
  def stop_keep_proxy():
-    kill_server_pid(os.path.join(TEST_TMPDIR, "keepproxy.pid"), 0)
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('keepproxy'), wait=0)
+
+def run_arv_git_httpd():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    stop_arv_git_httpd()
+
+    gitdir = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'git')
+    gitport = find_available_port()
+    env = os.environ.copy()
+    env.pop('ARVADOS_API_TOKEN', None)
+    agh = subprocess.Popen(
+        ['arv-git-httpd',
+         '-repo-root='+gitdir+'/test',
+         '-address=:'+str(gitport)],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    with open(_pidfile('arv-git-httpd'), 'w') as f:
+        f.write(str(agh.pid))
+    _setport('arv-git-httpd', gitport)
+    _wait_until_port_listens(gitport)
+
+def stop_arv_git_httpd():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('arv-git-httpd'), wait=0)
+
+def run_nginx():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    nginxconf = {}
+    nginxconf['KEEPPROXYPORT'] = _getport('keepproxy')
+    nginxconf['KEEPPROXYSSLPORT'] = find_available_port()
+    nginxconf['GITPORT'] = _getport('arv-git-httpd')
+    nginxconf['GITSSLPORT'] = find_available_port()
+    nginxconf['SSLCERT'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.pem')
+    nginxconf['SSLKEY'] = os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'self-signed.key')
+
+    conftemplatefile = os.path.join(MY_DIRNAME, 'nginx.conf')
+    conffile = os.path.join(TEST_TMPDIR, 'nginx.conf')
+    with open(conffile, 'w') as f:
+        f.write(re.sub(
+            r'{{([A-Z]+)}}',
+            lambda match: str(nginxconf.get(match.group(1))),
+            open(conftemplatefile).read()))
+
+    env = os.environ.copy()
+    env['PATH'] = env['PATH']+':/sbin:/usr/sbin:/usr/local/sbin'
+    nginx = subprocess.Popen(
+        ['nginx',
+         '-g', 'error_log stderr info;',
+         '-g', 'pid '+_pidfile('nginx')+';',
+         '-c', conffile],
+        env=env, stdin=open('/dev/null'), stdout=sys.stderr)
+    _setport('keepproxy-ssl', nginxconf['KEEPPROXYSSLPORT'])
+    _setport('arv-git-httpd-ssl', nginxconf['GITSSLPORT'])
+
+def stop_nginx():
+    if 'ARVADOS_TEST_PROXY_SERVICES' in os.environ:
+        return
+    kill_server_pid(_pidfile('nginx'), wait=0)
+
+def _pidfile(program):
+    return os.path.join(TEST_TMPDIR, program + '.pid')
+
+def _portfile(program):
+    return os.path.join(TEST_TMPDIR, program + '.port')
+
+def _setport(program, port):
+    with open(_portfile(program), 'w') as f:
+        f.write(str(port))
+
+# Returns 9 if program is not up.
+def _getport(program):
+    try:
+        return int(open(_portfile(program)).read())
+    except IOError:
+        return 9
+
+def _apiconfig(key):
+    if _cached_config:
+        return _cached_config[key]
+    def _load(f, required=True):
+        fullpath = os.path.join(SERVICES_SRC_DIR, 'api', 'config', f)
+        if not required and not os.path.exists(fullpath):
+            return {}
+        return yaml.load(fullpath)
+    cdefault = _load('application.default.yml')
+    csite = _load('application.yml', required=False)
+    _cached_config = {}
+    for section in [cdefault.get('common',{}), cdefault.get('test',{}),
+                    csite.get('common',{}), csite.get('test',{})]:
+        _cached_config.update(section)
+    return _cached_config[key]
  
  def fixture(fix):
      '''load a fixture yaml file'''
@@ -431,14 +579,21 @@ class TestCaseWithServers(unittest.TestCase):
  
  
  if __name__ == "__main__":
-    actions = ['start', 'stop',
-               'start_keep', 'stop_keep',
-               'start_keep_proxy', 'stop_keep_proxy']
+    actions = [
+        'start', 'stop',
+        'start_keep', 'stop_keep',
+        'start_keep_proxy', 'stop_keep_proxy',
+        'start_arv-git-httpd', 'stop_arv-git-httpd',
+        'start_nginx', 'stop_nginx',
+    ]
      parser = argparse.ArgumentParser()
      parser.add_argument('action', type=str, help="one of {}".format(actions))
      parser.add_argument('--auth', type=str, metavar='FIXTURE_NAME', help='Print authorization info for given api_client_authorizations fixture')
      args = parser.parse_args()
  
+    if args.action not in actions:
+        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions), file=sys.stderr)
+        sys.exit(1)
      if args.action == 'start':
          stop(force=('ARVADOS_TEST_API_HOST' not in os.environ))
          run(leave_running_atexit=True)
@@ -460,5 +615,13 @@ if __name__ == "__main__":
          run_keep_proxy()
      elif args.action == 'stop_keep_proxy':
          stop_keep_proxy()
+    elif args.action == 'start_arv-git-httpd':
+        run_arv_git_httpd()
+    elif args.action == 'stop_arv-git-httpd':
+        stop_arv_git_httpd()
+    elif args.action == 'start_nginx':
+        run_nginx()
+    elif args.action == 'stop_nginx':
+        stop_nginx()
      else:
-        print("Unrecognized action '{}'. Actions are: {}.".format(args.action, actions))
+        raise Exception("action recognized but not implemented!?")
diff --git a/sdk/python/tests/test_api.py b/sdk/python/tests/test_api.py

index faaaac307cf893398875d0aa5296e3fa709dd5fb..9d438e2e038ecca70225009d261e5229d61a81c3 100644 (file)
--- a/sdk/python/tests/test_api.py
+++ b/sdk/python/tests/test_api.py
@@ -1,14 +1,17 @@
  #!/usr/bin/env python
  
  import arvados
+import collections
  import httplib2
  import json
  import mimetypes
  import os
  import run_test_server
+import string
  import unittest
  from apiclient import errors as apiclient_errors
  from apiclient import http as apiclient_http
+from arvados.api import OrderedJsonModel
  
  from arvados_testutil import fake_httplib2_response
  
@@ -107,6 +110,21 @@ class ArvadosApiClientTest(unittest.TestCase):
              text = "X" * maxsize
              arvados.api('v1').collections().create(body={"manifest_text": text}).execute()
  
+    def test_ordered_json_model(self):
+        mock_responses = {
+            'arvados.humans.get': (None, json.dumps(collections.OrderedDict(
+                        (c, int(c, 16)) for c in string.hexdigits))),
+            }
+        req_builder = apiclient_http.RequestMockBuilder(mock_responses)
+        api = arvados.api('v1',
+                          host=os.environ['ARVADOS_API_HOST'],
+                          token='discovery-doc-only-no-token-needed',
+                          insecure=True,
+                          requestBuilder=req_builder,
+                          model=OrderedJsonModel())
+        result = api.humans().get(uuid='test').execute()
+        self.assertEqual(string.hexdigits, ''.join(result.keys()))
+
  
  if __name__ == '__main__':
      unittest.main()
diff --git a/sdk/python/tests/test_arv_put.py b/sdk/python/tests/test_arv_put.py

index d078268c83ae9a707b0dc0330338fb795d2b65c7..f4bd8b692bc2520cdda8a679abbb3c2d18ff85dd 100644 (file)
--- a/sdk/python/tests/test_arv_put.py
+++ b/sdk/python/tests/test_arv_put.py
@@ -381,6 +381,20 @@ class ArvadosPutTest(run_test_server.TestCaseWithServers, ArvadosBaseTestCase):
              arv_put.ResumeCache.CACHE_DIR = orig_cachedir
              os.chmod(cachedir, 0o700)
  
+    def test_normalize(self):
+        testfile1 = self.make_test_file()
+        testfile2 = self.make_test_file()
+        test_paths = [testfile1.name, testfile2.name]
+        # Reverse-sort the paths, so normalization must change their order.
+        test_paths.sort(reverse=True)
+        self.call_main_with_args(['--stream', '--no-progress', '--normalize'] +
+                                 test_paths)
+        manifest = self.main_stdout.getvalue()
+        # Assert the second file we specified appears first in the manifest.
+        file_indices = [manifest.find(':' + os.path.basename(path))
+                        for path in test_paths]
+        self.assertGreater(*file_indices)
+
      def test_error_name_without_collection(self):
          self.assertRaises(SystemExit, self.call_main_with_args,
                            ['--name', 'test without Collection',
@@ -400,7 +414,7 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
                              ArvadosBaseTestCase):
      def _getKeepServerConfig():
          for config_file, mandatory in [
-                ['application.yml', True], ['application.default.yml', False]]:
+                ['application.yml', False], ['application.default.yml', True]]:
              path = os.path.join(run_test_server.SERVICES_SRC_DIR,
                                  "api", "config", config_file)
              if not mandatory and not os.path.exists(path):
diff --git a/sdk/python/tests/test_arv_ws.py b/sdk/python/tests/test_arv_ws.py

new file mode 100644 (file)

index 0000000..5a01827
--- /dev/null
+++ b/sdk/python/tests/test_arv_ws.py
@@ -0,0 +1,13 @@
+#!/usr/bin/env python
+
+import unittest
+import arvados.errors as arv_error
+import arvados.commands.ws as arv_ws
+
+class ArvWsTestCase(unittest.TestCase):
+    def run_ws(self, args):
+        return arv_ws.main(args)
+
+    def test_unsupported_arg(self):
+        with self.assertRaises(SystemExit):
+            self.run_ws(['-x=unknown'])
diff --git a/sdk/python/tests/test_arvfile.py b/sdk/python/tests/test_arvfile.py

index 825465cb4aae15e05876217c9fcbb74db3dce0c4..99be4c2e401d0a34ad5b0b4b62fb0074899de2ed 100644 (file)
--- a/sdk/python/tests/test_arvfile.py
+++ b/sdk/python/tests/test_arvfile.py
@@ -77,11 +77,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.seek(0, os.SEEK_SET)
              self.assertEqual("01234567", writer.read(12))
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
              c.save_new("test_truncate")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
  
      def test_write_to_end(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -104,13 +104,13 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.seek(5, os.SEEK_SET)
              self.assertEqual("56789foo", writer.read(8))
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
  
              c.save_new("test_append")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
              self.assertEqual("foo", keep.get("acbd18db4cc2f85cedef654fccc4a4d8+3"))
  
  
@@ -128,7 +128,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
          writer.write("world")
          self.assertEqual(writer.read(20), "0123456789helloworld")
  
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 fc5e038d38a57032085441e7fe7010b0+10 0:20:count.txt\n", c.portable_manifest_text())
  
      def test_write_at_beginning(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -140,14 +140,36 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("foo")
              self.assertEqual(writer.size(), 10)
              self.assertEqual("foo3456789", writer.readfrom(0, 13))
-            self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.manifest_text())
+            self.assertEqual(". acbd18db4cc2f85cedef654fccc4a4d8+3 781e5e245d69b566979b86e28d23f2c7+10 0:3:count.txt 6:7:count.txt\n", c.portable_manifest_text())
  
      def test_write_empty(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
          with Collection(keep_client=keep) as c:
              writer = c.open("count.txt", "w")
              self.assertEqual(writer.size(), 0)
-            self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.manifest_text())
+            self.assertEqual(". d41d8cd98f00b204e9800998ecf8427e+0 0:0:count.txt\n", c.portable_manifest_text())
+
+    def test_save_manifest_text(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        with Collection(keep_client=keep) as c:
+            writer = c.open("count.txt", "w")
+            writer.write("0123456789")
+            self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
+            self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
+            self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.save_new(create_collection_record=False))
+            self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
+    def test_get_manifest_text_commits(self):
+         keep = ArvadosFileWriterTestCase.MockKeep({})
+         with Collection(keep_client=keep) as c:
+             writer = c.open("count.txt", "w")
+             writer.write("0123456789")
+             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.portable_manifest_text())
+             self.assertNotIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+             self.assertEqual('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n', c.manifest_text())
+             self.assertIn('781e5e245d69b566979b86e28d23f2c7+10', keep.blocks)
+
  
      def test_write_in_middle(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -159,7 +181,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("foo")
              self.assertEqual(writer.size(), 10)
              self.assertEqual("012foo6789", writer.readfrom(0, 13))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:count.txt 10:3:count.txt 6:4:count.txt\n", c.portable_manifest_text())
  
      def test_write_at_end(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -171,7 +193,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("foo")
              self.assertEqual(writer.size(), 10)
              self.assertEqual("0123456foo", writer.readfrom(0, 13))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 acbd18db4cc2f85cedef654fccc4a4d8+3 0:7:count.txt 10:3:count.txt\n", c.portable_manifest_text())
  
      def test_write_across_segment_boundary(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -183,7 +205,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("foobar")
              self.assertEqual(writer.size(), 20)
              self.assertEqual("0123456foobar34", writer.readfrom(0, 15))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 3858f62230ac3c915f300c664312c63f+6 0:7:count.txt 10:6:count.txt 3:7:count.txt\n", c.portable_manifest_text())
  
      def test_write_across_several_segments(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -195,7 +217,7 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("abcdefg")
              self.assertEqual(writer.size(), 12)
              self.assertEqual("01abcdefg123", writer.readfrom(0, 15))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 7ac66c0f148de9519b8bd264312c4d64+7 0:2:count.txt 10:7:count.txt 1:3:count.txt\n", c.portable_manifest_text())
  
      def test_write_large(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -206,16 +228,33 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
          with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
                               api_client=api, keep_client=keep) as c:
              writer = c.open("count.txt", "r+")
-            text = ''.join(["0123456789" for a in xrange(0, 100)])
+            text = "0123456789" * 100
              for b in xrange(0, 100000):
                  writer.write(text)
              self.assertEqual(writer.size(), 100000000)
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
              c.save_new("test_write_large")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
+
+
+    def test_large_write(self):
+        keep = ArvadosFileWriterTestCase.MockKeep({})
+        api = ArvadosFileWriterTestCase.MockApi({}, {})
+        with Collection('. ' + arvados.config.EMPTY_BLOCK_LOCATOR + ' 0:0:count.txt',
+                             api_client=api, keep_client=keep) as c:
+            writer = c.open("count.txt", "r+")
+            self.assertEqual(writer.size(), 0)
+
+            text = "0123456789"
+            writer.write(text)
+            text = "0123456789" * 9999999
+            writer.write(text)
+            self.assertEqual(writer.size(), 100000000)
+
+            self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 48dd23ea1645fd47d789804d71b5bb8e+67108864 77c57dc6ac5a10bb2205caaa73187994+32891126 0:100000000:count.txt\n")
  
      def test_rewrite_on_empty_file(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -228,11 +267,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
  
              self.assertEqual(writer.size(), 10)
              self.assertEqual("0123456789", writer.readfrom(0, 20))
-            self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 7a08b07e84641703e5f2c836aa59a170+100 90:10:count.txt\n", c.portable_manifest_text())
              writer.flush()
              self.assertEqual(writer.size(), 10)
              self.assertEqual("0123456789", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n", c.portable_manifest_text())
  
      def test_rewrite_append_existing_file(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -245,12 +284,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
  
              self.assertEqual(writer.size(), 20)
              self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:10:count.txt 100:10:count.txt\n", c.portable_manifest_text())
  
              writer.arvadosfile.flush()
              self.assertEqual(writer.size(), 20)
              self.assertEqual("0123456789abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:20:count.txt\n", c.portable_manifest_text())
  
      def test_rewrite_over_existing_file(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
@@ -263,13 +302,13 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
  
              self.assertEqual(writer.size(), 15)
              self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 ae5f43bab79cf0be33f025fa97ae7398+100 0:5:count.txt 100:10:count.txt\n", c.portable_manifest_text())
  
              writer.arvadosfile.flush()
  
              self.assertEqual(writer.size(), 15)
              self.assertEqual("01234abcdefghij", writer.readfrom(0, 20))
-            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.manifest_text())
+            self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 a925576942e94b2ef57a066101b48876+10 0:5:count.txt 10:10:count.txt\n", c.portable_manifest_text())
  
      def test_write_large_rewrite(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -287,11 +326,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("foo")
              self.assertEqual(writer.size(), 100000000)
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
              c.save_new("test_write_large")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
  
      def test_create(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
@@ -305,12 +344,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("01234567")
              self.assertEqual(writer.size(), 8)
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
              c.save_new("test_create")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
              self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
  
  
@@ -324,7 +363,9 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              self.assertIsNone(c.api_response())
              writer = c.open("foo/bar/count.txt", "w+")
              writer.write("01234567")
+            self.assertFalse(c.committed())
              c.save_new("test_create")
+            self.assertTrue(c.committed())
              self.assertEqual(c.api_response(), api.response)
  
      def test_overwrite(self):
@@ -340,11 +381,11 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              writer.write("01234567")
              self.assertEqual(writer.size(), 8)
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
              c.save_new("test_overwrite")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
  
      def test_file_not_found(self):
          with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n') as c:
@@ -370,12 +411,12 @@ class ArvadosFileWriterTestCase(unittest.TestCase):
              self.assertEqual(w1.size(), 8)
              self.assertEqual(w2.size(), 8)
  
-            self.assertEqual(None, c.manifest_locator())
-            self.assertEqual(True, c.modified())
-            self.assertEqual(None, keep.get("2e9ec317e197819358fbc43afca7d837+8"))
+            self.assertIsNone(c.manifest_locator())
+            self.assertTrue(c.modified())
+            self.assertIsNone(keep.get("2e9ec317e197819358fbc43afca7d837+8"))
              c.save_new("test_create_multiple")
              self.assertEqual("zzzzz-4zz18-mockcollection0", c.manifest_locator())
-            self.assertEqual(False, c.modified())
+            self.assertFalse(c.modified())
              self.assertEqual("01234567", keep.get("2e9ec317e197819358fbc43afca7d837+8"))
  
  
@@ -415,23 +456,28 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
              blocks[loc] = d
              stream.append(Range(loc, n, len(d)))
              n += len(d)
-        af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
-        return ArvadosFileReader(af, "count.txt")
+        af = ArvadosFile(ArvadosFileReaderTestCase.MockParent(blocks, nocache), "count.txt", stream=stream, segments=[Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)])
+        return ArvadosFileReader(af)
  
-    def test_read_returns_first_block(self):
-        # read() calls will be aligned on block boundaries - see #3663.
+    def test_read_block_crossing_behavior(self):
+        # read() needs to return all the data requested if possible, even if it
+        # crosses uncached blocks: https://arvados.org/issues/5856
          sfile = self.make_count_reader(nocache=True)
-        self.assertEqual('123', sfile.read(10))
+        self.assertEqual('12345678', sfile.read(8))
  
      def test_successive_reads(self):
+        # Override StreamFileReaderTestCase.test_successive_reads
          sfile = self.make_count_reader(nocache=True)
-        for expect in ['123', '456', '789', '']:
-            self.assertEqual(expect, sfile.read(10))
+        self.assertEqual('1234', sfile.read(4))
+        self.assertEqual('5678', sfile.read(4))
+        self.assertEqual('9', sfile.read(4))
+        self.assertEqual('', sfile.read(4))
  
      def test_tell_after_block_read(self):
+        # Override StreamFileReaderTestCase.test_tell_after_block_read
          sfile = self.make_count_reader(nocache=True)
-        sfile.read(5)
-        self.assertEqual(3, sfile.tell())
+        self.assertEqual('12345678', sfile.read(8))
+        self.assertEqual(8, sfile.tell())
  
      def test_prefetch(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"2e9ec317e197819358fbc43afca7d837+8": "01234567", "e8dc4081b13434b45189a720b77b6818+8": "abcdefgh"})
@@ -450,8 +496,8 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
      def test__eq__from_writes(self):
          with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
              with Collection() as c2:
-                with c2.open("count1.txt", "w") as f:
-                    f.write("0123456789")
+                f = c2.open("count1.txt", "w")
+                f.write("0123456789")
  
                  self.assertTrue(c1["count1.txt"] == c2["count1.txt"])
                  self.assertFalse(c1["count1.txt"] != c2["count1.txt"])
@@ -459,8 +505,8 @@ class ArvadosFileReaderTestCase(StreamFileReaderTestCase):
      def test__ne__(self):
          with Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt') as c1:
              with Collection() as c2:
-                with c2.open("count1.txt", "w") as f:
-                    f.write("1234567890")
+                f = c2.open("count1.txt", "w")
+                f.write("1234567890")
  
                  self.assertTrue(c1["count1.txt"] != c2["count1.txt"])
                  self.assertFalse(c1["count1.txt"] == c2["count1.txt"])
@@ -483,10 +529,10 @@ class ArvadosFileReadTestCase(unittest.TestCase, StreamRetryTestMixin):
          blockmanager = arvados.arvfile._BlockManager(self.keep_client())
          blockmanager.prefetch_enabled = False
          col = Collection(keep_client=self.keep_client(), block_manager=blockmanager)
-        af = ArvadosFile(col,
+        af = ArvadosFile(col, "test",
                           stream=stream,
                           segments=segments)
-        return ArvadosFileReader(af, "test", **kwargs)
+        return ArvadosFileReader(af, **kwargs)
  
      def read_for_test(self, reader, byte_count, **kwargs):
          return reader.read(byte_count, **kwargs)
@@ -514,75 +560,85 @@ class ArvadosFileReadlinesTestCase(ArvadosFileReadTestCase):
  class BlockManagerTest(unittest.TestCase):
      def test_bufferblock_append(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
  
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
  
-        bufferblock.append("bar")
+            bufferblock.append("bar")
  
-        self.assertEqual(bufferblock.size(), 6)
-        self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
-        self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
+            self.assertEqual(bufferblock.size(), 6)
+            self.assertEqual(bufferblock.buffer_view[0:6], "foobar")
+            self.assertEqual(bufferblock.locator(), "3858f62230ac3c915f300c664312c63f+6")
  
-        bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
-        with self.assertRaises(arvados.errors.AssertionError):
-            bufferblock.append("bar")
+            bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
+            with self.assertRaises(arvados.errors.AssertionError):
+                bufferblock.append("bar")
  
      def test_bufferblock_dup(self):
          keep = ArvadosFileWriterTestCase.MockKeep({})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
  
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
-        bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            bufferblock.set_state(arvados.arvfile._BufferBlock.PENDING)
  
-        bufferblock2 = blockmanager.dup_block(bufferblock, None)
-        self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
+            bufferblock2 = blockmanager.dup_block(bufferblock, None)
+            self.assertNotEqual(bufferblock.blockid, bufferblock2.blockid)
  
-        bufferblock2.append("bar")
+            bufferblock2.append("bar")
  
-        self.assertEqual(bufferblock2.size(), 6)
-        self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
-        self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
+            self.assertEqual(bufferblock2.size(), 6)
+            self.assertEqual(bufferblock2.buffer_view[0:6], "foobar")
+            self.assertEqual(bufferblock2.locator(), "3858f62230ac3c915f300c664312c63f+6")
  
-        self.assertEqual(bufferblock.size(), 3)
-        self.assertEqual(bufferblock.buffer_view[0:3], "foo")
-        self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
+            self.assertEqual(bufferblock.size(), 3)
+            self.assertEqual(bufferblock.buffer_view[0:3], "foo")
+            self.assertEqual(bufferblock.locator(), "acbd18db4cc2f85cedef654fccc4a4d8+3")
  
      def test_bufferblock_get(self):
          keep = ArvadosFileWriterTestCase.MockKeep({"781e5e245d69b566979b86e28d23f2c7+10": "0123456789"})
-        blockmanager = arvados.arvfile._BlockManager(keep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
+        with arvados.arvfile._BlockManager(keep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.append("foo")
  
-        self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
-        self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
+            self.assertEqual(blockmanager.get_block_contents("781e5e245d69b566979b86e28d23f2c7+10", 1), "0123456789")
+            self.assertEqual(blockmanager.get_block_contents(bufferblock.blockid, 1), "foo")
  
      def test_bufferblock_commit(self):
          mockkeep = mock.MagicMock()
-        blockmanager = arvados.arvfile._BlockManager(mockkeep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
-        blockmanager.commit_all()
-        self.assertTrue(mockkeep.put.called)
-        self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
-        self.assertIsNone(bufferblock.buffer_view)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.owner = mock.MagicMock()
+            def flush(sync=None):
+                blockmanager.commit_bufferblock(bufferblock, sync)
+            bufferblock.owner.flush.side_effect = flush
+            bufferblock.append("foo")
+            blockmanager.commit_all()
+            self.assertTrue(bufferblock.owner.flush.called)
+            self.assertTrue(mockkeep.put.called)
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.COMMITTED)
+            self.assertIsNone(bufferblock.buffer_view)
  
  
      def test_bufferblock_commit_with_error(self):
          mockkeep = mock.MagicMock()
          mockkeep.put.side_effect = arvados.errors.KeepWriteError("fail")
-        blockmanager = arvados.arvfile._BlockManager(mockkeep)
-        bufferblock = blockmanager.alloc_bufferblock()
-        bufferblock.append("foo")
-        with self.assertRaises(arvados.errors.KeepWriteError) as err:
-            blockmanager.commit_all()
-        self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
-        self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.PENDING)
+        with arvados.arvfile._BlockManager(mockkeep) as blockmanager:
+            bufferblock = blockmanager.alloc_bufferblock()
+            bufferblock.owner = mock.MagicMock()
+            def flush(sync=None):
+                blockmanager.commit_bufferblock(bufferblock, sync)
+            bufferblock.owner.flush.side_effect = flush
+            bufferblock.append("foo")
+            with self.assertRaises(arvados.errors.KeepWriteError) as err:
+                blockmanager.commit_all()
+            self.assertTrue(bufferblock.owner.flush.called)
+            self.assertEqual(str(err.exception), "Error writing some blocks: block acbd18db4cc2f85cedef654fccc4a4d8+3 raised KeepWriteError (fail)")
+            self.assertEqual(bufferblock.state(), arvados.arvfile._BufferBlock.ERROR)
diff --git a/sdk/python/tests/test_benchmark_collections.py b/sdk/python/tests/test_benchmark_collections.py

new file mode 100644 (file)

index 0000000..d75ad47
--- /dev/null
+++ b/sdk/python/tests/test_benchmark_collections.py
@@ -0,0 +1,97 @@
+import arvados
+import sys
+
+import run_test_server
+import arvados_testutil as tutil
+import manifest_examples
+from performance.performance_profiler import profiled
+
+class CollectionBenchmark(run_test_server.TestCaseWithServers,
+                          tutil.ArvadosBaseTestCase,
+                          manifest_examples.ManifestExamples):
+    MAIN_SERVER = {}
+    TEST_BLOCK_SIZE = 0
+
+    @classmethod
+    def list_recursive(cls, coll, parent_name=None):
+        if parent_name is None:
+            current_name = coll.stream_name()
+        else:
+            current_name = '{}/{}'.format(parent_name, coll.name)
+        try:
+            for name in coll:
+                for item in cls.list_recursive(coll[name], current_name):
+                    yield item
+        except TypeError:
+            yield current_name
+
+    @classmethod
+    def setUpClass(cls):
+        super(CollectionBenchmark, cls).setUpClass()
+        run_test_server.authorize_with('active')
+        cls.api_client = arvados.api('v1')
+        cls.keep_client = arvados.KeepClient(api_client=cls.api_client,
+                                             local_store=cls.local_store)
+
+    @profiled
+    def profile_new_collection_from_manifest(self, manifest_text):
+        return arvados.collection.Collection(manifest_text)
+
+    @profiled
+    def profile_new_collection_from_server(self, uuid):
+        return arvados.collection.Collection(uuid)
+
+    @profiled
+    def profile_new_collection_copying_bytes_from_collection(self, src):
+        dst = arvados.collection.Collection()
+        with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+            for name in self.list_recursive(src):
+                with src.open(name) as srcfile, dst.open(name, 'w') as dstfile:
+                    dstfile.write(srcfile.read())
+            dst.save_new()
+
+    @profiled
+    def profile_new_collection_copying_files_from_collection(self, src):
+        dst = arvados.collection.Collection()
+        with tutil.mock_keep_responses('x'*self.TEST_BLOCK_SIZE, 200):
+            for name in self.list_recursive(src):
+                dst.copy(name, name, src)
+            dst.save_new()
+
+    @profiled
+    def profile_collection_list_files(self, coll):
+        return sum(1 for name in self.list_recursive(coll))
+
+    def test_medium_sized_manifest(self):
+        """Exercise manifest-handling code.
+
+        Currently, this test puts undue emphasis on some code paths
+        that don't reflect typical use because the contrived example
+        manifest has some unusual characteristics:
+
+        * Block size is zero.
+
+        * Every block is identical, so block caching patterns are
+          unrealistic.
+
+        * Every file begins and ends at a block boundary.
+        """
+        specs = {
+            'streams': 100,
+            'files_per_stream': 100,
+            'blocks_per_file': 20,
+            'bytes_per_block': self.TEST_BLOCK_SIZE,
+        }
+        my_manifest = self.make_manifest(**specs)
+
+        coll = self.profile_new_collection_from_manifest(my_manifest)
+
+        coll.save_new()
+        self.profile_new_collection_from_server(coll.manifest_locator())
+
+        num_items = self.profile_collection_list_files(coll)
+        self.assertEqual(num_items, specs['streams'] * specs['files_per_stream'])
+
+        self.profile_new_collection_copying_bytes_from_collection(coll)
+
+        self.profile_new_collection_copying_files_from_collection(coll)
diff --git a/sdk/python/tests/test_collections.py b/sdk/python/tests/test_collections.py

index 8cf34f0282655fd88f27330ec96e5e0b362370cc..13fc88def303c28d4161e3e4e3d080b9cb17cce6 100644 (file)
--- a/sdk/python/tests/test_collections.py
+++ b/sdk/python/tests/test_collections.py
@@ -551,7 +551,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
      def test_locator_init(self):
          client = self.api_client_mock(200)
          # Ensure Keep will not return anything if asked.
-        with tutil.mock_get_responses(None, 404):
+        with tutil.mock_keep_responses(None, 404):
              reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                                api_client=client)
              self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
@@ -561,7 +561,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
          # been written to Keep.
          client = self.api_client_mock(200)
          self.mock_get_collection(client, 404, None)
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
              reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                                api_client=client)
              self.assertEqual(self.DEFAULT_MANIFEST, reader.manifest_text())
@@ -569,7 +569,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
      def test_uuid_init_no_fallback_to_keep(self):
          # Do not look up a collection UUID in Keep.
          client = self.api_client_mock(404)
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
              with self.assertRaises(arvados.errors.ApiError):
                  reader = arvados.CollectionReader(self.DEFAULT_UUID,
                                                    api_client=client)
@@ -578,7 +578,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
          # To verify that CollectionReader tries Keep first here, we
          # mock API server to return the wrong data.
          client = self.api_client_mock(200)
-        with tutil.mock_get_responses(self.ALT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.ALT_MANIFEST, 200):
              self.assertEqual(
                  self.ALT_MANIFEST,
                  arvados.CollectionReader(
@@ -590,7 +590,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
          client = self.api_client_mock(200)
          reader = arvados.CollectionReader(self.DEFAULT_UUID, api_client=client,
                                            num_retries=3)
-        with tutil.mock_get_responses('foo', 500, 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 500, 200):
              self.assertEqual('foo',
                               ''.join(f.read(9) for f in reader.all_files()))
  
@@ -630,7 +630,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
      def test_api_response_with_collection_from_keep(self):
          client = self.api_client_mock()
          self.mock_get_collection(client, 404, 'foo')
-        with tutil.mock_get_responses(self.DEFAULT_MANIFEST, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_MANIFEST, 200):
              reader = arvados.CollectionReader(self.DEFAULT_DATA_HASH,
                                                api_client=client)
              api_response = reader.api_response()
@@ -673,7 +673,7 @@ class CollectionReaderTestCase(unittest.TestCase, CollectionTestMixin):
  class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
      def mock_keep(self, body, *codes, **headers):
          headers.setdefault('x-keep-replicas-stored', 2)
-        return tutil.mock_put_responses(body, *codes, **headers)
+        return tutil.mock_keep_responses(body, *codes, **headers)
  
      def foo_writer(self, **kwargs):
          kwargs.setdefault('api_client', self.api_client_mock())
@@ -695,7 +695,7 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
  
      def test_write_insufficient_replicas_via_proxy(self):
          writer = self.foo_writer(replication=3)
-        with self.mock_keep(None, 200, headers={'x-keep-replicas-stored': 2}):
+        with self.mock_keep(None, 200, **{'x-keep-replicas-stored': 2}):
              with self.assertRaises(arvados.errors.KeepWriteError):
                  writer.manifest_text()
  
@@ -712,15 +712,12 @@ class CollectionWriterTestCase(unittest.TestCase, CollectionTestMixin):
      def test_write_three_replicas(self):
          client = mock.MagicMock(name='api_client')
          with self.mock_keep(
-                None, 500, 500, 500, 200, 200, 200,
+                "", 500, 500, 500, 200, 200, 200,
                  **{'x-keep-replicas-stored': 1}) as keepmock:
              self.mock_keep_services(client, status=200, service_type='disk', count=6)
              writer = self.foo_writer(api_client=client, replication=3)
              writer.manifest_text()
-            # keepmock is the mock session constructor; keepmock.return_value
-            # is the mock session object, and keepmock.return_value.put is the
-            # actual mock method of interest.
-            self.assertEqual(6, keepmock.return_value.put.call_count)
+            self.assertEqual(6, keepmock.call_count)
  
      def test_write_whole_collection_through_retries(self):
          writer = self.foo_writer(num_retries=2)
@@ -830,11 +827,11 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
  
      def test_remove(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n')
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
          self.assertIn("count1.txt", c)
          c.remove("count1.txt")
          self.assertNotIn("count1.txt", c)
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
          with self.assertRaises(arvados.errors.ArgumentError):
              c.remove("")
  
@@ -851,45 +848,63 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
      def test_remove_in_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          c.remove("foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
  
      def test_remove_empty_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          c.remove("foo/count2.txt")
          c.remove("foo")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
  
      def test_remove_nonempty_subdir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          with self.assertRaises(IOError):
              c.remove("foo")
          c.remove("foo", recursive=True)
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
  
      def test_copy_to_file_in_dir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
          c.copy("count1.txt", "foo/count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.portable_manifest_text())
  
      def test_copy_file(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
          c.copy("count1.txt", "count2.txt")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
  
      def test_copy_to_existing_dir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          c.copy("count1.txt", "foo")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt 0:10:count2.txt\n", c.portable_manifest_text())
  
      def test_copy_to_new_dir(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
          c.copy("count1.txt", "foo/")
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n", c.portable_manifest_text())
+
+    def test_rename_file(self):
+        c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
+        c.rename("count1.txt", "count2.txt")
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+
+    def test_move_file_to_dir(self):
+        c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
+        c.mkdirs("foo")
+        c.rename("count1.txt", "foo/count2.txt")
+        self.assertEqual("./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c.manifest_text())
+
+    def test_move_file_to_other(self):
+        c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
+        c2 = Collection()
+        c2.rename("count1.txt", "count2.txt", source_collection=c1)
+        self.assertEqual("", c1.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", c2.manifest_text())
  
      def test_clone(self):
          c = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
          cl = c.clone()
-        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.manifest_text())
+        self.assertEqual(". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n", cl.portable_manifest_text())
  
      def test_diff_del_add(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -900,9 +915,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('del', './count1.txt', c1["count1.txt"]),
                               ('add', './count2.txt', c2["count2.txt"])])
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_same(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -912,9 +927,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [])
  
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_mod(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -924,9 +939,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
  
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_add(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -936,9 +951,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('add', './count2.txt', c2["count2.txt"])])
  
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_add_in_subcollection(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -948,9 +963,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('add', './foo', c2["foo"])])
  
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_del_add_in_subcollection(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
@@ -963,9 +978,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          self.assertEqual(d, [('del', './foo/count2.txt', c1.find("foo/count2.txt")),
                               ('add', './foo/count3.txt', c2.find("foo/count3.txt"))])
  
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_diff_mod_in_subcollection(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n./foo 5348b82a029fd9e971a811ce1f71360b+43 0:10:count2.txt\n')
@@ -975,9 +990,9 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('mod', './foo', c1["foo"], c2["foo"])])
  
-        self.assertNotEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertNotEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), c2.manifest_text())
+        self.assertEqual(c1.portable_manifest_text(), c2.portable_manifest_text())
  
      def test_conflict_keep_local_change(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt\n')
@@ -985,24 +1000,24 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('del', './count1.txt', c1["count1.txt"]),
                               ('add', './count2.txt', c2["count2.txt"])])
-        with c1.open("count1.txt", "w") as f:
-            f.write("zzzzz")
+        f = c1.open("count1.txt", "w")
+        f.write("zzzzz")
  
          # c1 changed, so it should not be deleted.
          c1.apply(d)
-        self.assertEqual(c1.manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
+        self.assertEqual(c1.portable_manifest_text(), ". 95ebc3c7b3b9f1d2c40fec14415d3cb8+5 5348b82a029fd9e971a811ce1f71360b+43 0:5:count1.txt 5:10:count2.txt\n")
  
      def test_conflict_mod(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
          c2 = Collection('. 5348b82a029fd9e971a811ce1f71360b+43 0:10:count1.txt')
          d = c1.diff(c2)
          self.assertEqual(d, [('mod', './count1.txt', c1["count1.txt"], c2["count1.txt"])])
-        with c1.open("count1.txt", "w") as f:
-            f.write("zzzzz")
+        f = c1.open("count1.txt", "w")
+        f.write("zzzzz")
  
          # c1 changed, so c2 mod will go to a conflict file
          c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
  
      def test_conflict_add(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count2.txt\n')
@@ -1010,12 +1025,12 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
          d = c1.diff(c2)
          self.assertEqual(d, [('del', './count2.txt', c1["count2.txt"]),
                               ('add', './count1.txt', c2["count1.txt"])])
-        with c1.open("count1.txt", "w") as f:
-            f.write("zzzzz")
+        f = c1.open("count1.txt", "w")
+        f.write("zzzzz")
  
          # c1 added count1.txt, so c2 add will go to a conflict file
          c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 95ebc3c7b3b9f1d2c40fec14415d3cb8\+5 5348b82a029fd9e971a811ce1f71360b\+43 0:5:count1\.txt 5:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
  
      def test_conflict_del(self):
          c1 = Collection('. 781e5e245d69b566979b86e28d23f2c7+10 0:10:count1.txt')
@@ -1026,7 +1041,7 @@ class NewCollectionTestCase(unittest.TestCase, CollectionTestMixin):
  
          # c1 deleted, so c2 mod will go to a conflict file
          c1.apply(d)
-        self.assertRegexpMatches(c1.manifest_text(), r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.portable_manifest_text(), r"\. 5348b82a029fd9e971a811ce1f71360b\+43 0:10:count1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
  
      def test_notify(self):
          c1 = Collection()
@@ -1058,7 +1073,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
          with c.open("count.txt", "w") as f:
              f.write("0123456789")
  
-        self.assertEqual(c.manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
+        self.assertEqual(c.portable_manifest_text(), ". 781e5e245d69b566979b86e28d23f2c7+10 0:10:count.txt\n")
  
          return c
  
@@ -1067,7 +1082,6 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
          c.save()
          self.assertRegexpMatches(c.manifest_text(), r"^\. 781e5e245d69b566979b86e28d23f2c7\+10\+A[a-f0-9]{40}@[a-f0-9]{8} 0:10:count\.txt$",)
  
-
      def test_create_and_save_new(self):
          c = self.create_count_txt()
          c.save_new()
@@ -1130,7 +1144,7 @@ class CollectionCreateUpdateTest(run_test_server.TestCaseWithServers):
          c2.save()
  
          c1.update()
-        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~conflict-\d\d\d\d-\d\d-\d\d-\d\d:\d\d:\d\d~$")
+        self.assertRegexpMatches(c1.manifest_text(), r"\. e65075d550f9b5bf9992fa1d71a131be\+3 7ac66c0f148de9519b8bd264312c4d64\+7\+A[a-f0-9]{40}@[a-f0-9]{8} 0:3:count\.txt 3:7:count\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~$")
  
  
  if __name__ == '__main__':
diff --git a/sdk/python/tests/test_errors.py b/sdk/python/tests/test_errors.py

index 6e8df96316aa36510828515d08cb7c3562dcfe99..db875dc212a1567638ad82155817078e0da48773 100644 (file)
--- a/sdk/python/tests/test_errors.py
+++ b/sdk/python/tests/test_errors.py
@@ -10,8 +10,8 @@ class KeepRequestErrorTestCase(unittest.TestCase):
      REQUEST_ERRORS = [
          ('http://keep1.zzzzz.example.org/', IOError("test IOError")),
          ('http://keep3.zzzzz.example.org/', MemoryError("test MemoryError")),
-        ('http://keep5.zzzzz.example.org/', tutil.fake_requests_response(
-                500, "test 500")),
+        ('http://keep5.zzzzz.example.org/',
+         arv_error.HttpError(500, "Internal Server Error")),
          ('http://keep7.zzzzz.example.org/', IOError("second test IOError")),
          ]
  
diff --git a/sdk/python/tests/test_keep_client.py b/sdk/python/tests/test_keep_client.py

index baae28e3d78ff8de224dfaf509856cd625dbe8f4..c44379bac79465417e9a7d128d1aa47f13d6a6fa 100644 (file)
--- a/sdk/python/tests/test_keep_client.py
+++ b/sdk/python/tests/test_keep_client.py
@@ -1,15 +1,19 @@
  import hashlib
  import mock
  import os
+import pycurl
  import random
  import re
  import socket
+import threading
+import time
  import unittest
  import urlparse
  
  import arvados
  import arvados.retry
  import arvados_testutil as tutil
+import keepstub
  import run_test_server
  
  class KeepTestCase(run_test_server.TestCaseWithServers):
@@ -251,7 +255,7 @@ class KeepProxyTestCase(run_test_server.TestCaseWithServers):
  class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
      def get_service_roots(self, api_client):
          keep_client = arvados.KeepClient(api_client=api_client)
-        services = keep_client.weighted_service_roots('000000')
+        services = keep_client.weighted_service_roots(arvados.KeepLocator('0'*32))
          return [urlparse.urlparse(url) for url in sorted(services)]
  
      def test_ssl_flag_respected_in_roots(self):
@@ -267,63 +271,66 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
          self.assertEqual('100::1', service.hostname)
          self.assertEqual(10, service.port)
  
-    # test_get_timeout and test_put_timeout test that
-    # KeepClient.get and KeepClient.put use the appropriate timeouts
-    # when connected directly to a Keep server (i.e. non-proxy timeout)
+    # test_*_timeout verify that KeepClient instructs pycurl to use
+    # the appropriate connection and read timeouts. They don't care
+    # whether pycurl actually exhibits the expected timeout behavior
+    # -- those tests are in the KeepClientTimeout test class.
  
      def test_get_timeout(self):
          api_client = self.mock_keep_services(count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_get(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
              keep_client = arvados.KeepClient(api_client=api_client)
              with self.assertRaises(arvados.errors.KeepReadError):
                  keep_client.get('ffffffffffffffffffffffffffffffff')
-            self.assertTrue(mock_session.return_value.get.called)
              self.assertEqual(
-                arvados.KeepClient.DEFAULT_TIMEOUT,
-                mock_session.return_value.get.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
  
      def test_put_timeout(self):
          api_client = self.mock_keep_services(count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_put(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
              keep_client = arvados.KeepClient(api_client=api_client)
              with self.assertRaises(arvados.errors.KeepWriteError):
                  keep_client.put('foo')
-            self.assertTrue(mock_session.return_value.put.called)
              self.assertEqual(
-                arvados.KeepClient.DEFAULT_TIMEOUT,
-                mock_session.return_value.put.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_TIMEOUT[1]*1000))
  
      def test_proxy_get_timeout(self):
-        # Force a timeout, verifying that the requests.get or
-        # requests.put method was called with the proxy_timeout
-        # setting rather than the default timeout.
          api_client = self.mock_keep_services(service_type='proxy', count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_get(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
              keep_client = arvados.KeepClient(api_client=api_client)
              with self.assertRaises(arvados.errors.KeepReadError):
                  keep_client.get('ffffffffffffffffffffffffffffffff')
-            self.assertTrue(mock_session.return_value.get.called)
              self.assertEqual(
-                arvados.KeepClient.DEFAULT_PROXY_TIMEOUT,
-                mock_session.return_value.get.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
  
      def test_proxy_put_timeout(self):
-        # Force a timeout, verifying that the requests.get or
-        # requests.put method was called with the proxy_timeout
-        # setting rather than the default timeout.
          api_client = self.mock_keep_services(service_type='proxy', count=1)
-        force_timeout = [socket.timeout("timed out")]
-        with tutil.mock_put(force_timeout) as mock_session:
+        force_timeout = socket.timeout("timed out")
+        with tutil.mock_keep_responses(force_timeout, 0) as mock:
              keep_client = arvados.KeepClient(api_client=api_client)
              with self.assertRaises(arvados.errors.KeepWriteError):
                  keep_client.put('foo')
-            self.assertTrue(mock_session.return_value.put.called)
              self.assertEqual(
-                arvados.KeepClient.DEFAULT_PROXY_TIMEOUT,
-                mock_session.return_value.put.call_args[1]['timeout'])
+                mock.responses[0].getopt(pycurl.CONNECTTIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[0]*1000))
+            self.assertEqual(
+                mock.responses[0].getopt(pycurl.TIMEOUT_MS),
+                int(arvados.KeepClient.DEFAULT_PROXY_TIMEOUT[1]*1000))
  
      def test_probe_order_reference_set(self):
          # expected_order[i] is the probe order for
@@ -344,7 +351,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
          api_client = self.mock_keep_services(count=16)
          keep_client = arvados.KeepClient(api_client=api_client)
          for i, hash in enumerate(hashes):
-            roots = keep_client.weighted_service_roots(hash)
+            roots = keep_client.weighted_service_roots(arvados.KeepLocator(hash))
              got_order = [
                  re.search(r'//\[?keep0x([0-9a-f]+)', root).group(1)
                  for root in roots]
@@ -357,14 +364,14 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
          api_client = self.mock_keep_services(count=initial_services)
          keep_client = arvados.KeepClient(api_client=api_client)
          probes_before = [
-            keep_client.weighted_service_roots(hash) for hash in hashes]
+            keep_client.weighted_service_roots(arvados.KeepLocator(hash)) for hash in hashes]
          for added_services in range(1, 12):
              api_client = self.mock_keep_services(count=initial_services+added_services)
              keep_client = arvados.KeepClient(api_client=api_client)
              total_penalty = 0
              for hash_index in range(len(hashes)):
                  probe_after = keep_client.weighted_service_roots(
-                    hashes[hash_index])
+                    arvados.KeepLocator(hashes[hash_index]))
                  penalty = probe_after.index(probes_before[hash_index][0])
                  self.assertLessEqual(penalty, added_services)
                  total_penalty += penalty
@@ -397,9 +404,9 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
          aport = random.randint(1024,65535)
          api_client = self.mock_keep_services(service_port=aport, count=16)
          keep_client = arvados.KeepClient(api_client=api_client)
-        with mock.patch('requests.' + verb,
-                        side_effect=socket.timeout) as req_mock, \
-                self.assertRaises(exc_class) as err_check:
+        with mock.patch('pycurl.Curl') as curl_mock, \
+             self.assertRaises(exc_class) as err_check:
+            curl_mock.return_value.side_effect = socket.timeout
              getattr(keep_client, verb)(data)
          urls = [urlparse.urlparse(url)
                  for url in err_check.exception.request_errors()]
@@ -429,7 +436,7 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
  
      def check_errors_from_last_retry(self, verb, exc_class):
          api_client = self.mock_keep_services(count=2)
-        req_mock = getattr(tutil, 'mock_{}_responses'.format(verb))(
+        req_mock = tutil.mock_keep_responses(
              "retry error reporting test", 500, 500, 403, 403)
          with req_mock, tutil.skip_sleep, \
                  self.assertRaises(exc_class) as err_check:
@@ -450,12 +457,169 @@ class KeepClientServiceTestCase(unittest.TestCase, tutil.ApiClientMock):
          data = 'partial failure test'
          data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
          api_client = self.mock_keep_services(count=3)
-        with tutil.mock_put_responses(data_loc, 200, 500, 500) as req_mock, \
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
                  self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
              keep_client = arvados.KeepClient(api_client=api_client)
              keep_client.put(data)
          self.assertEqual(2, len(exc_check.exception.request_errors()))
  
+    def test_proxy_put_with_no_writable_services(self):
+        data = 'test with no writable services'
+        data_loc = '{}+{}'.format(hashlib.md5(data).hexdigest(), len(data))
+        api_client = self.mock_keep_services(service_type='proxy', read_only=True, count=1)
+        with tutil.mock_keep_responses(data_loc, 200, 500, 500) as req_mock, \
+                self.assertRaises(arvados.errors.KeepWriteError) as exc_check:
+          keep_client = arvados.KeepClient(api_client=api_client)
+          keep_client.put(data)
+        self.assertEqual(True, ("no Keep services available" in str(exc_check.exception)))
+        self.assertEqual(0, len(exc_check.exception.request_errors()))
+
+class KeepClientTimeout(unittest.TestCase, tutil.ApiClientMock):
+    DATA = 'x' * 2**10
+
+    class assertTakesBetween(unittest.TestCase):
+        def __init__(self, tmin, tmax):
+            self.tmin = tmin
+            self.tmax = tmax
+
+        def __enter__(self):
+            self.t0 = time.time()
+
+        def __exit__(self, *args, **kwargs):
+            self.assertGreater(time.time() - self.t0, self.tmin)
+            self.assertLess(time.time() - self.t0, self.tmax)
+
+    def setUp(self):
+        sock = socket.socket()
+        sock.bind(('0.0.0.0', 0))
+        self.port = sock.getsockname()[1]
+        sock.close()
+        self.server = keepstub.Server(('0.0.0.0', self.port), keepstub.Handler)
+        self.thread = threading.Thread(target=self.server.serve_forever)
+        self.thread.daemon = True # Exit thread if main proc exits
+        self.thread.start()
+        self.api_client = self.mock_keep_services(
+            count=1,
+            service_host='localhost',
+            service_port=self.port,
+        )
+
+    def tearDown(self):
+        self.server.shutdown()
+
+    def keepClient(self, timeouts=(0.1, 1.0)):
+        return arvados.KeepClient(
+            api_client=self.api_client,
+            timeout=timeouts)
+
+    def test_timeout_slow_connect(self):
+        # Can't simulate TCP delays with our own socket. Leave our
+        # stub server running uselessly, and try to connect to an
+        # unroutable IP address instead.
+        self.api_client = self.mock_keep_services(
+            count=1,
+            service_host='240.0.0.0',
+        )
+        with self.assertTakesBetween(0.1, 0.5):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                self.keepClient((0.1, 1)).put(self.DATA, copies=1, num_retries=0)
+
+    def test_timeout_slow_request(self):
+        self.server.setdelays(request=0.2)
+        self._test_200ms()
+
+    def test_timeout_slow_response(self):
+        self.server.setdelays(response=0.2)
+        self._test_200ms()
+
+    def test_timeout_slow_response_body(self):
+        self.server.setdelays(response_body=0.2)
+        self._test_200ms()
+
+    def _test_200ms(self):
+        """Connect should be t<100ms, request should be 200ms <= t < 300ms"""
+
+        # Allow 100ms to connect, then 1s for response. Everything
+        # should work, and everything should take at least 200ms to
+        # return.
+        kc = self.keepClient((.1, 1))
+        with self.assertTakesBetween(.2, .3):
+            loc = kc.put(self.DATA, copies=1, num_retries=0)
+        with self.assertTakesBetween(.2, .3):
+            self.assertEqual(self.DATA, kc.get(loc, num_retries=0))
+
+        # Allow 1s to connect, then 100ms for response. Nothing should
+        # work, and everything should take at least 100ms to return.
+        kc = self.keepClient((1, .1))
+        with self.assertTakesBetween(.1, .2):
+            with self.assertRaises(arvados.errors.KeepReadError):
+                kc.get(loc, num_retries=0)
+        with self.assertTakesBetween(.1, .2):
+            with self.assertRaises(arvados.errors.KeepWriteError):
+                kc.put(self.DATA, copies=1, num_retries=0)
+
+
+class KeepClientGatewayTestCase(unittest.TestCase, tutil.ApiClientMock):
+    def mock_disks_and_gateways(self, disks=3, gateways=1):
+        self.gateways = [{
+                'uuid': 'zzzzz-bi6l4-gateway{:08d}'.format(i),
+                'owner_uuid': 'zzzzz-tpzed-000000000000000',
+                'service_host': 'gatewayhost{}'.format(i),
+                'service_port': 12345,
+                'service_ssl_flag': True,
+                'service_type': 'gateway:test',
+        } for i in range(gateways)]
+        self.gateway_roots = [
+            "https://{service_host}:{service_port}/".format(**gw)
+            for gw in self.gateways]
+        self.api_client = self.mock_keep_services(
+            count=disks, additional_services=self.gateways)
+        self.keepClient = arvados.KeepClient(api_client=self.api_client)
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_gateway_hint_first(self, MockCurl):
+        MockCurl.return_value = tutil.FakeCurl.make(
+            code=200, body='foo', headers={'Content-Length': 3})
+        self.mock_disks_and_gateways()
+        locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@' + self.gateways[0]['uuid']
+        self.assertEqual('foo', self.keepClient.get(locator))
+        self.assertEqual(self.gateway_roots[0]+locator,
+                         MockCurl.return_value.getopt(pycurl.URL))
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_gateway_hints_in_order(self, MockCurl):
+        gateways = 4
+        disks = 3
+        mocks = [
+            tutil.FakeCurl.make(code=404, body='')
+            for _ in range(gateways+disks)
+        ]
+        MockCurl.side_effect = tutil.queue_with(mocks)
+        self.mock_disks_and_gateways(gateways=gateways, disks=disks)
+        locator = '+'.join(['acbd18db4cc2f85cedef654fccc4a4d8+3'] +
+                           ['K@'+gw['uuid'] for gw in self.gateways])
+        with self.assertRaises(arvados.errors.NotFoundError):
+            self.keepClient.get(locator)
+        # Gateways are tried first, in the order given.
+        for i, root in enumerate(self.gateway_roots):
+            self.assertEqual(root+locator,
+                             mocks[i].getopt(pycurl.URL))
+        # Disk services are tried next.
+        for i in range(gateways, gateways+disks):
+            self.assertRegexpMatches(
+                mocks[i].getopt(pycurl.URL),
+                r'keep0x')
+
+    @mock.patch('pycurl.Curl')
+    def test_get_with_remote_proxy_hint(self, MockCurl):
+        MockCurl.return_value = tutil.FakeCurl.make(
+            code=200, body='foo', headers={'Content-Length': 3})
+        self.mock_disks_and_gateways()
+        locator = 'acbd18db4cc2f85cedef654fccc4a4d8+3+K@xyzzy'
+        self.assertEqual('foo', self.keepClient.get(locator))
+        self.assertEqual('https://keep.xyzzy.arvadosapi.com/'+locator,
+                         MockCurl.return_value.getopt(pycurl.URL))
+
  
  class KeepClientRetryTestMixin(object):
      # Testing with a local Keep store won't exercise the retry behavior.
@@ -528,14 +692,14 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
      DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_DATA
      DEFAULT_EXCEPTION = arvados.errors.KeepReadError
      HINTED_LOCATOR = KeepClientRetryTestMixin.TEST_LOCATOR + '+K@xyzzy'
-    TEST_PATCHER = staticmethod(tutil.mock_get_responses)
+    TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
  
      def run_method(self, locator=KeepClientRetryTestMixin.TEST_LOCATOR,
                     *args, **kwargs):
          return self.new_client().get(locator, *args, **kwargs)
  
      def test_specific_exception_when_not_found(self):
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200):
              self.check_exception(arvados.errors.NotFoundError, num_retries=3)
  
      def test_general_exception_with_mixed_errors(self):
@@ -544,7 +708,7 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
          # This test rigs up 50/50 disagreement between two servers, and
          # checks that it does not become a NotFoundError.
          client = self.new_client()
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 500):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 500):
              with self.assertRaises(arvados.errors.KeepReadError) as exc_check:
                  client.get(self.HINTED_LOCATOR)
              self.assertNotIsInstance(
@@ -552,17 +716,19 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
                  "mixed errors raised NotFoundError")
  
      def test_hint_server_can_succeed_without_retries(self):
-        with tutil.mock_get_responses(self.DEFAULT_EXPECT, 404, 200, 500):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 404, 200, 500):
              self.check_success(locator=self.HINTED_LOCATOR)
  
      def test_try_next_server_after_timeout(self):
-        with tutil.mock_get([
-                socket.timeout("timed out"),
-                tutil.fake_requests_response(200, self.DEFAULT_EXPECT)]):
+        with tutil.mock_keep_responses(
+                (socket.timeout("timed out"), 200),
+                (self.DEFAULT_EXPECT, 200)):
              self.check_success(locator=self.HINTED_LOCATOR)
  
      def test_retry_data_with_wrong_checksum(self):
-        with tutil.mock_get((tutil.fake_requests_response(200, s) for s in ['baddata', self.TEST_DATA])):
+        with tutil.mock_keep_responses(
+                ('baddata', 200),
+                (self.DEFAULT_EXPECT, 200)):
              self.check_success(locator=self.HINTED_LOCATOR)
  
  
@@ -570,12 +736,12 @@ class KeepClientRetryGetTestCase(KeepClientRetryTestMixin, unittest.TestCase):
  class KeepClientRetryPutTestCase(KeepClientRetryTestMixin, unittest.TestCase):
      DEFAULT_EXPECT = KeepClientRetryTestMixin.TEST_LOCATOR
      DEFAULT_EXCEPTION = arvados.errors.KeepWriteError
-    TEST_PATCHER = staticmethod(tutil.mock_put_responses)
+    TEST_PATCHER = staticmethod(tutil.mock_keep_responses)
  
      def run_method(self, data=KeepClientRetryTestMixin.TEST_DATA,
                     copies=1, *args, **kwargs):
          return self.new_client().put(data, copies, *args, **kwargs)
  
      def test_do_not_send_multiple_copies_to_same_server(self):
-        with tutil.mock_put_responses(self.DEFAULT_EXPECT, 200):
+        with tutil.mock_keep_responses(self.DEFAULT_EXPECT, 200):
              self.check_exception(copies=2, num_retries=3)
diff --git a/sdk/python/tests/test_retry.py b/sdk/python/tests/test_retry.py

index 0c1110c5ceb10e5164d48db9c03bd2e74f8b9639..c41c42e762cd5e8f856444926716b0c274735bb5 100644 (file)
--- a/sdk/python/tests/test_retry.py
+++ b/sdk/python/tests/test_retry.py
@@ -7,8 +7,6 @@ import arvados.errors as arv_error
  import arvados.retry as arv_retry
  import mock
  
-from arvados_testutil import fake_requests_response
-
  class RetryLoopTestMixin(object):
      @staticmethod
      def loop_success(result):
@@ -150,8 +148,7 @@ class RetryLoopBackoffTestCase(unittest.TestCase, RetryLoopTestMixin):
  class CheckHTTPResponseSuccessTestCase(unittest.TestCase):
      def results_map(self, *codes):
          for code in codes:
-            response = fake_requests_response(code, None)
-            yield code, arv_retry.check_http_response_success(response)
+            yield code, arv_retry.check_http_response_success(code)
  
      def check(assert_name):
          def check_method(self, expected, *codes):
@@ -204,8 +201,10 @@ class RetryMethodTestCase(unittest.TestCase):
              return (a, num_retries, z)
  
  
-    def test_positional_arg_passed(self):
-        self.assertEqual((3, 2, 0), self.Tester().check(3, 2))
+    def test_positional_arg_raises(self):
+        # unsupported use -- make sure we raise rather than ignore
+        with self.assertRaises(TypeError):
+            self.assertEqual((3, 2, 0), self.Tester().check(3, 2))
  
      def test_keyword_arg_passed(self):
          self.assertEqual((4, 3, 0), self.Tester().check(num_retries=3, a=4))
diff --git a/sdk/python/tests/test_stream.py b/sdk/python/tests/test_stream.py

index 15257fe31270e8d3614ba44e02637c3fbf7c3691..6c3bd61414173fb64fe9ef7b7b1b44dcc4af6d9d 100644 (file)
--- a/sdk/python/tests/test_stream.py
+++ b/sdk/python/tests/test_stream.py
@@ -21,7 +21,7 @@ class StreamFileReaderTestCase(unittest.TestCase):
          return StreamFileReader(stream, [Range(1, 0, 3), Range(6, 3, 3), Range(11, 6, 3)],
                                  'count.txt')
  
-    def test_read_returns_first_block(self):
+    def test_read_block_crossing_behavior(self):
          # read() calls will be aligned on block boundaries - see #3663.
          sfile = self.make_count_reader()
          self.assertEqual('123', sfile.read(10))
@@ -48,7 +48,7 @@ class StreamFileReaderTestCase(unittest.TestCase):
          self.assertEqual('123456789', ''.join(sfile.readall()))
  
      def test_one_arg_seek(self):
-        self.test_relative_seek([])
+        self.test_absolute_seek([])
  
      def test_absolute_seek(self, args=[os.SEEK_SET]):
          sfile = self.make_count_reader()
@@ -199,47 +199,47 @@ class StreamRetryTestMixin(object):
  
      @tutil.skip_sleep
      def test_success_without_retries(self):
-        with tutil.mock_get_responses('bar', 200):
+        with tutil.mock_keep_responses('bar', 200):
              reader = self.reader_for('bar_file')
              self.assertEqual('bar', self.read_for_test(reader, 3))
  
      @tutil.skip_sleep
      def test_read_no_default_retry(self):
-        with tutil.mock_get_responses('', 500):
+        with tutil.mock_keep_responses('', 500):
              reader = self.reader_for('user_agreement')
              with self.assertRaises(arvados.errors.KeepReadError):
                  self.read_for_test(reader, 10)
  
      @tutil.skip_sleep
      def test_read_with_instance_retries(self):
-        with tutil.mock_get_responses('foo', 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 200):
              reader = self.reader_for('foo_file', num_retries=3)
              self.assertEqual('foo', self.read_for_test(reader, 3))
  
      @tutil.skip_sleep
      def test_read_with_method_retries(self):
-        with tutil.mock_get_responses('foo', 500, 200):
+        with tutil.mock_keep_responses('foo', 500, 200):
              reader = self.reader_for('foo_file')
              self.assertEqual('foo',
                               self.read_for_test(reader, 3, num_retries=3))
  
      @tutil.skip_sleep
      def test_read_instance_retries_exhausted(self):
-        with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
+        with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
              reader = self.reader_for('bar_file', num_retries=3)
              with self.assertRaises(arvados.errors.KeepReadError):
                  self.read_for_test(reader, 3)
  
      @tutil.skip_sleep
      def test_read_method_retries_exhausted(self):
-        with tutil.mock_get_responses('bar', 500, 500, 500, 500, 200):
+        with tutil.mock_keep_responses('bar', 500, 500, 500, 500, 200):
              reader = self.reader_for('bar_file')
              with self.assertRaises(arvados.errors.KeepReadError):
                  self.read_for_test(reader, 3, num_retries=3)
  
      @tutil.skip_sleep
      def test_method_retries_take_precedence(self):
-        with tutil.mock_get_responses('', 500, 500, 500, 200):
+        with tutil.mock_keep_responses('', 500, 500, 500, 200):
              reader = self.reader_for('user_agreement', num_retries=10)
              with self.assertRaises(arvados.errors.KeepReadError):
                  self.read_for_test(reader, 10, num_retries=1)
diff --git a/sdk/python/tests/test_util.py b/sdk/python/tests/test_util.py

index f9e5d8ce40b2a462f10ec7cf710e146e0b795d0b..41739a92bd9396bf73c7562ce8db25dfb0e66750 100644 (file)
--- a/sdk/python/tests/test_util.py
+++ b/sdk/python/tests/test_util.py
@@ -1,6 +1,8 @@
-import unittest
  import os
-import arvados.util
+import subprocess
+import unittest
+
+import arvados
  
  class MkdirDashPTest(unittest.TestCase):
      def setUp(self):
@@ -20,3 +22,15 @@ class MkdirDashPTest(unittest.TestCase):
          with open('./tmp/bar', 'wb') as f:
              f.write('bar')
          self.assertRaises(OSError, arvados.util.mkdir_dash_p, './tmp/bar')
+
+
+class RunCommandTestCase(unittest.TestCase):
+    def test_success(self):
+        stdout, stderr = arvados.util.run_command(['echo', 'test'],
+                                                  stderr=subprocess.PIPE)
+        self.assertEqual("test\n", stdout)
+        self.assertEqual("", stderr)
+
+    def test_failure(self):
+        with self.assertRaises(arvados.errors.CommandFailedError):
+            arvados.util.run_command(['false'])
diff --git a/sdk/python/tests/test_websockets.py b/sdk/python/tests/test_websockets.py

index d879ebe1f8062c02d965bd9c845e5e00c57d1e76..37b644aaf1eed4e4b01e4e8e90f3a8606a2e7ed5 100644 (file)
--- a/sdk/python/tests/test_websockets.py
+++ b/sdk/python/tests/test_websockets.py
@@ -1,14 +1,19 @@
-import Queue
-import run_test_server
-import unittest
  import arvados
  import arvados.events
+from datetime import datetime, timedelta, tzinfo
  import mock
+import Queue
+import run_test_server
  import threading
+import time
+import unittest
  
  class WebsocketTest(run_test_server.TestCaseWithServers):
      MAIN_SERVER = {}
  
+    TIME_PAST = time.time()-3600
+    TIME_FUTURE = time.time()+3600
+
      def setUp(self):
          self.ws = None
  
@@ -16,25 +21,102 @@ class WebsocketTest(run_test_server.TestCaseWithServers):
          if self.ws:
              self.ws.close()
          super(WebsocketTest, self).tearDown()
+        run_test_server.reset()
  
-    def _test_subscribe(self, poll_fallback, expect_type):
+    def _test_subscribe(self, poll_fallback, expect_type, start_time=None, expected=1):
          run_test_server.authorize_with('active')
-        events = Queue.Queue(3)
+        events = Queue.Queue(100)
+
+        # Create ancestor before subscribing.
+        # When listening with start_time in the past, this should also be retrieved.
+        # However, when start_time is omitted in subscribe, this should not be fetched.
+        ancestor = arvados.api('v1').humans().create(body={}).execute()
+
+        filters = [['object_uuid', 'is_a', 'arvados#human']]
+        if start_time:
+            filters.append(['created_at', '>=', start_time])
+
          self.ws = arvados.events.subscribe(
-            arvados.api('v1'), [['object_uuid', 'is_a', 'arvados#human']],
-            events.put, poll_fallback=poll_fallback)
+            arvados.api('v1'), filters,
+            events.put_nowait,
+            poll_fallback=poll_fallback,
+            last_log_id=(1 if start_time else None))
          self.assertIsInstance(self.ws, expect_type)
-        self.assertEqual(200, events.get(True, 10)['status'])
+        self.assertEqual(200, events.get(True, 5)['status'])
          human = arvados.api('v1').humans().create(body={}).execute()
-        self.assertEqual(human['uuid'], events.get(True, 10)['object_uuid'])
-        self.assertTrue(events.empty(), "got more events than expected")
+
+        log_object_uuids = []
+        for i in range(0, expected):
+            log_object_uuids.append(events.get(True, 5)['object_uuid'])
+
+        if expected > 0:
+            self.assertIn(human['uuid'], log_object_uuids)
+
+        if expected > 1:
+            self.assertIn(ancestor['uuid'], log_object_uuids)
+
+        with self.assertRaises(Queue.Empty):
+            # assertEqual just serves to show us what unexpected thing
+            # comes out of the queue when the assertRaises fails; when
+            # the test passes, this assertEqual doesn't get called.
+            self.assertEqual(events.get(True, 2), None)
  
      def test_subscribe_websocket(self):
          self._test_subscribe(
-            poll_fallback=False, expect_type=arvados.events.EventClient)
+            poll_fallback=False, expect_type=arvados.events.EventClient, expected=1)
  
      @mock.patch('arvados.events.EventClient.__init__')
      def test_subscribe_poll(self, event_client_constr):
          event_client_constr.side_effect = Exception('All is well')
          self._test_subscribe(
-            poll_fallback=1, expect_type=arvados.events.PollClient)
+            poll_fallback=0.25, expect_type=arvados.events.PollClient, expected=1)
+
+    def test_subscribe_websocket_with_start_time_past(self):
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient,
+            start_time=self.localiso(self.TIME_PAST),
+            expected=2)
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_subscribe_poll_with_start_time_past(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        self._test_subscribe(
+            poll_fallback=0.25, expect_type=arvados.events.PollClient,
+            start_time=self.localiso(self.TIME_PAST),
+            expected=2)
+
+    def test_subscribe_websocket_with_start_time_future(self):
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient,
+            start_time=self.localiso(self.TIME_FUTURE),
+            expected=0)
+
+    @mock.patch('arvados.events.EventClient.__init__')
+    def test_subscribe_poll_with_start_time_future(self, event_client_constr):
+        event_client_constr.side_effect = Exception('All is well')
+        self._test_subscribe(
+            poll_fallback=0.25, expect_type=arvados.events.PollClient,
+            start_time=self.localiso(self.TIME_FUTURE),
+            expected=0)
+
+    def test_subscribe_websocket_with_start_time_past_utc(self):
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient,
+            start_time=self.utciso(self.TIME_PAST),
+            expected=2)
+
+    def test_subscribe_websocket_with_start_time_future_utc(self):
+        self._test_subscribe(
+            poll_fallback=False, expect_type=arvados.events.EventClient,
+            start_time=self.utciso(self.TIME_FUTURE),
+            expected=0)
+
+    def utciso(self, t):
+        return time.strftime('%Y-%m-%dT%H:%M:%SZ', time.gmtime(t))
+
+    def localiso(self, t):
+        return time.strftime('%Y-%m-%dT%H:%M:%S', time.localtime(t)) + self.isotz(-time.timezone/60)
+
+    def isotz(self, offset):
+        """Convert minutes-east-of-UTC to ISO8601 time zone designator"""
+        return '{:+03d}{:02d}'.format(offset/60, offset%60)
diff --git a/sdk/ruby/arvados.gemspec b/sdk/ruby/arvados.gemspec

index e957b67038a0a351ddabc25c0f1afa64f406a1dd..e092ed47a3eabeb83ea5aa16b188cb577e53716e 100644 (file)
--- a/sdk/ruby/arvados.gemspec
+++ b/sdk/ruby/arvados.gemspec
@@ -16,7 +16,7 @@ Gem::Specification.new do |s|
    s.email       = 'gem-dev@curoverse.com'
    s.licenses    = ['Apache License, Version 2.0']
    s.files       = ["lib/arvados.rb", "lib/arvados/google_api_client.rb",
-                   "lib/arvados/keep.rb"]
+                   "lib/arvados/collection.rb", "lib/arvados/keep.rb"]
    s.required_ruby_version = '>= 2.1.0'
    s.add_dependency('google-api-client', '~> 0.6.3', '>= 0.6.3')
    s.add_dependency('activesupport', '>= 3.2.13')
diff --git a/sdk/ruby/lib/arvados/collection.rb b/sdk/ruby/lib/arvados/collection.rb

index ec0f443daabaea6ce1062bb106b44844851a1387..07b751908f7da26b93fd5321fe8a5c192872a8d6 100644 (file)
--- a/sdk/ruby/lib/arvados/collection.rb
+++ b/sdk/ruby/lib/arvados/collection.rb
@@ -14,8 +14,13 @@ module Arv
          loc_list = LocatorList.new(locators)
          file_specs.map { |s| manifest.split_file_token(s) }.
              each do |file_start, file_len, file_path|
-          @root.file_at(normalize_path(stream_root, file_path)).
-            add_segment(loc_list.segment(file_start, file_len))
+          begin
+            @root.file_at(normalize_path(stream_root, file_path)).
+              add_segment(loc_list.segment(file_start, file_len))
+          rescue Errno::ENOTDIR, Errno::EISDIR => error
+            raise ArgumentError.new("%p is both a stream and file" %
+                                    error.to_s.partition(" - ").last)
+          end
          end
        end
      end
@@ -43,6 +48,19 @@ module Arv
        copy(:merge, source.chomp("/"), target, source_collection, opts)
      end
  
+    def each_file_path(&block)
+      @root.each_file_path(&block)
+    end
+
+    def exist?(path)
+      begin
+        substream, item = find(path)
+        not (substream.leaf? or substream[item].nil?)
+      rescue Errno::ENOENT, Errno::ENOTDIR
+        false
+      end
+    end
+
      def rename(source, target)
        copy(:add_copy, source, target) { rm_r(source) }
      end
@@ -88,13 +106,19 @@ module Arv
        # is found and can be copied.
        source_collection = self if source_collection.nil?
        src_stream, src_tail = source_collection.find(source)
-      dst_stream, dst_tail = find(target)
+      dst_stream_path, _, dst_tail = normalize_path(target).rpartition("/")
+      if dst_stream_path.empty?
+        dst_stream, dst_tail = @root.find(dst_tail)
+        dst_tail ||= src_tail
+      else
+        dst_stream = @root.stream_at(dst_stream_path)
+        dst_tail = src_tail if dst_tail.empty?
+      end
        if (source_collection.equal?(self) and
            (src_stream.path == dst_stream.path) and (src_tail == dst_tail))
          return self
        end
        src_item = src_stream[src_tail]
-      dst_tail ||= src_tail
        check_method = "check_can_#{copy_method}".to_sym
        target_name = nil
        if opts.fetch(:descend_target, true)
@@ -272,6 +296,17 @@ module Arv
          end
        end
  
+      def each_file_path
+        return to_enum(__method__) unless block_given?
+        items.each_value do |item|
+          if item.file?
+            yield item.path
+          else
+            item.each_file_path { |path| yield path }
+          end
+        end
+      end
+
        def find(find_path)
          # Given a POSIX-style path, return the CollectionStream that
          # contains the object at that path, and the name of the object
@@ -283,7 +318,7 @@ module Arv
  
        def stream_at(find_path)
          key, rest = find_path.split("/", 2)
-        next_stream = get_or_new(key, CollectionStream)
+        next_stream = get_or_new(key, CollectionStream, Errno::ENOTDIR)
          if rest.nil?
            next_stream
          else
@@ -294,7 +329,7 @@ module Arv
        def file_at(find_path)
          stream_path, _, file_name = find_path.rpartition("/")
          if stream_path.empty?
-          get_or_new(file_name, CollectionFile)
+          get_or_new(file_name, CollectionFile, Errno::EISDIR)
          else
            stream_at(stream_path).file_at(file_name)
          end
@@ -377,17 +412,15 @@ module Arv
          items[key] = item
        end
  
-      def get_or_new(key, klass)
+      def get_or_new(key, klass, err_class)
          # Return the collection item at `key` and ensure that it's a `klass`.
          # If `key` does not exist, create a new `klass` there.
-        # If the value for `key` is not a `klass`, raise an ArgumentError.
+        # If the value for `key` is not a `klass`, raise an `err_class`.
          item = items[key]
          if item.nil?
            self[key] = klass.new("#{path}/#{key}")
          elsif not item.is_a?(klass)
-          raise ArgumentError.
-            new("in stream %p, %p is a %s, not a %s" %
-                [path, key, items[key].class.human_name, klass.human_name])
+          raise err_class.new(item.path)
          else
            item
          end
diff --git a/sdk/ruby/lib/arvados/keep.rb b/sdk/ruby/lib/arvados/keep.rb

index e4f62083b0d5568d757d8ae05dc043170e10f6ca..3c6b26b765f59c4938465aaa7dcc589187fa7722 100644 (file)
--- a/sdk/ruby/lib/arvados/keep.rb
+++ b/sdk/ruby/lib/arvados/keep.rb
@@ -18,7 +18,7 @@ module Keep
      #   sign-timestamp ::= <8 lowercase hex digits>
      attr_reader :hash, :hints, :size
  
-    LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?(\+([[:upper:]][[:alnum:]+@_-]*))?$/
+    LOCATOR_REGEXP = /^([[:xdigit:]]{32})(\+([[:digit:]]+))?((\+([[:upper:]][[:alnum:]@_-]*))+)?\z/
  
      def initialize(hasharg, sizearg, hintarg)
        @hash = hasharg
@@ -97,6 +97,9 @@ module Keep
    end
  
    class Manifest
+    STRICT_STREAM_TOKEN_REGEXP = /^(\.)(\/[^\/\s]+)*$/
+    STRICT_FILE_TOKEN_REGEXP = /^[[:digit:]]+:[[:digit:]]+:([^\s\/]+(\/[^\s\/]+)*)$/
+
      # Class to parse a manifest text and provide common views of that data.
      def initialize(manifest_text)
        @text = manifest_text
@@ -154,7 +157,16 @@ module Keep
              stream_name = unescape token
            elsif in_file_tokens or not Locator.valid? token
              in_file_tokens = true
-            yield [stream_name] + split_file_token(token)
+
+            file_tokens = split_file_token(token)
+            stream_name_adjuster = ''
+            if file_tokens[2].include?('/')                # '/' in filename
+              parts = file_tokens[2].rpartition('/')
+              stream_name_adjuster = parts[1] + parts[0]   # /dir_parts
+              file_tokens[2] = parts[2]
+            end
+
+            yield [stream_name + stream_name_adjuster] + file_tokens
            end
          end
        end
@@ -215,5 +227,63 @@ module Keep
        end
        false
      end
+
+    # Verify that a given manifest is valid according to
+    # https://arvados.org/projects/arvados/wiki/Keep_manifest_format
+    def self.validate! manifest
+      raise ArgumentError.new "No manifest found" if !manifest
+
+      return true if manifest.empty?
+
+      raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n")
+      line_count = 0
+      manifest.each_line do |line|
+        line_count += 1
+
+        words = line[0..-2].split(/ /)
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty?
+
+        count = 0
+
+        word = words.shift
+        count += 1 if word =~ STRICT_STREAM_TOKEN_REGEXP and word !~ /\/\.\.?(\/|$)/
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1
+
+        count = 0
+        word = words.shift
+        while word =~ Locator::LOCATOR_REGEXP
+          word = words.shift
+          count += 1
+        end
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0
+
+        count = 0
+        while word =~ STRICT_FILE_TOKEN_REGEXP and ($~[1].split('/') & ['..','.']).empty?
+          word = words.shift
+          count += 1
+        end
+
+        if word
+          raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}"
+        elsif count == 0
+          raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens"
+        end
+
+        # Ruby's split() method silently drops trailing empty tokens
+        # (which are not allowed by the manifest format) so we have to
+        # check trailing spaces manually.
+        raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n"
+      end
+      true
+    end
+
+    def self.valid? manifest
+      begin
+        validate! manifest
+        true
+      rescue ArgumentError
+        false
+      end
+    end
    end
  end
diff --git a/sdk/ruby/test/sdk_fixtures.rb b/sdk/ruby/test/sdk_fixtures.rb

index 52d7377f80c7ae41889353fe0a7507e950ff43c5..6bf6a9e613c14a52a3ab3649279a8a3cfd878f9c 100644 (file)
--- a/sdk/ruby/test/sdk_fixtures.rb
+++ b/sdk/ruby/test/sdk_fixtures.rb
@@ -54,6 +54,11 @@ module SDKFixtures
    NONNORMALIZED_MANIFEST =
      ["./dir2 #{random_block} 0:0:z 0:0:y 0:0:x",
       "./dir1 #{random_block} 0:0:p 0:0:o 0:0:n\n"].join("\n")
+  MANIFEST_WITH_DIRS_IN_FILENAMES =
+    [". #{random_block(10)} 0:3:file1 3:3:dir1/file1 6:3:dir1/dir2/file1\n"].join("")
+  MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES =
+    [". #{random_block(10)} 0:3:file1 3:3:dir1/file1 6:4:dir1/dir2/file1\n",
+     "./dir1 #{random_block(10)} 0:3:file1 3:7:dir2/file1\n"].join("")
  
    ### Non-tree manifests
    # These manifests follow the spec, but they express a structure that can't
diff --git a/sdk/ruby/test/test_collection.rb b/sdk/ruby/test/test_collection.rb

index 3dd1ab39712857b54552a5d382e1ed8f6d4939ee..e2a39bc5edbda8705e0434fa9738c5192b325938 100644 (file)
--- a/sdk/ruby/test/test_collection.rb
+++ b/sdk/ruby/test/test_collection.rb
@@ -223,13 +223,17 @@ class CollectionTest < Minitest::Test
      assert_equal(expected.join(""), coll.manifest_text)
    end
  
-  def test_copy_stream_over_file_raises_ENOTDIR
+  def test_copy_stream_over_file_raises_ENOTDIR(source="./s1", target="./f2")
      coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
      assert_raises(Errno::ENOTDIR) do
-      coll.cp_r("./s1", "./f2")
+      coll.cp_r(source, target)
      end
    end
  
+  def test_copy_file_under_file_raises_ENOTDIR
+    test_copy_stream_over_file_raises_ENOTDIR("./f1", "./f2/newfile")
+  end
+
    def test_copy_stream_over_nonempty_stream_merges_and_overwrites
      blocks = random_blocks(3, 9)
      manifest_a =
@@ -323,6 +327,20 @@ class CollectionTest < Minitest::Test
      assert_equal(expect_lines.join(""), coll.manifest_text)
    end
  
+  def test_copy_file_into_new_stream_with_implicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/")
+    assert_equal(SIMPLEST_MANIFEST + SIMPLEST_MANIFEST.sub(". ", "./new "),
+                 coll.manifest_text)
+  end
+
+  def test_copy_file_into_new_stream_with_explicit_filename
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.cp_r("./simple.txt", "./new/newfile.txt")
+    new_line = SIMPLEST_MANIFEST.sub(". ", "./new ").sub(":simple", ":newfile")
+    assert_equal(SIMPLEST_MANIFEST + new_line, coll.manifest_text)
+  end
+
    def test_copy_stream_contents_into_root
      coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
      coll.cp_r("./s1/", ".")
@@ -374,6 +392,71 @@ class CollectionTest < Minitest::Test
      test_copy_empty_source_path_raises_ArgumentError(".", "")
    end
  
+  ### .each_file_path
+
+  def test_each_file_path
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    if block_given?
+      result = yield(coll)
+    else
+      result = []
+      coll.each_file_path { |path| result << path }
+    end
+    assert_equal(["./f1", "./f2", "./s1/f1", "./s1/f3"], result.sort)
+  end
+
+  def test_each_file_path_without_block
+    test_each_file_path { |coll| coll.each_file_path.to_a }
+  end
+
+  def test_each_file_path_empty_collection
+    assert_empty(Arv::Collection.new.each_file_path.to_a)
+  end
+
+  def test_each_file_path_after_collection_emptied
+    coll = Arv::Collection.new(SIMPLEST_MANIFEST)
+    coll.rm("simple.txt")
+    assert_empty(coll.each_file_path.to_a)
+  end
+
+  def test_each_file_path_deduplicates_manifest_listings
+    coll = Arv::Collection.new(MULTIBLOCK_FILE_MANIFEST)
+    assert_equal(["./repfile", "./s1/repfile", "./s1/uniqfile",
+                  "./uniqfile", "./uniqfile2"],
+                 coll.each_file_path.to_a.sort)
+  end
+
+  ### .exist?
+
+  def test_exist(test_method=:assert, path="f2")
+    coll = Arv::Collection.new(TWO_BY_TWO_MANIFEST_S)
+    send(test_method, coll.exist?(path))
+  end
+
+  def test_file_not_exist
+    test_exist(:refute, "f3")
+  end
+
+  def test_stream_exist
+    test_exist(:assert, "s1")
+  end
+
+  def test_file_inside_stream_exist
+    test_exist(:assert, "s1/f1")
+  end
+
+  def test_path_inside_stream_not_exist
+    test_exist(:refute, "s1/f2")
+  end
+
+  def test_path_under_file_not_exist
+    test_exist(:refute, "f2/nonexistent")
+  end
+
+  def test_deep_substreams_not_exist
+    test_exist(:refute, "a/b/c/d/e/f/g")
+  end
+
    ### .rename
  
    def test_simple_file_rename
diff --git a/sdk/ruby/test/test_keep_manifest.rb b/sdk/ruby/test/test_keep_manifest.rb

index f1f1a530ceddbf35a9d5cd3066acc180eb4f254e..5ed9cfc2b186b6e2122d41a879603054962bd0d3 100644 (file)
--- a/sdk/ruby/test/test_keep_manifest.rb
+++ b/sdk/ruby/test/test_keep_manifest.rb
@@ -208,4 +208,222 @@ class ManifestTest < Minitest::Test
        assert !file_name.empty?, "empty file_name in #{name} fixture"
      end
    end
+
+  def test_collection_with_dirs_in_filenames
+    manifest = Keep::Manifest.new(MANIFEST_WITH_DIRS_IN_FILENAMES)
+
+    seen = Hash.new { |this, key| this[key] = [] }
+
+    manifest.files.each do |stream, basename, size|
+      refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
+      assert_equal(3, size, "wrong size for #{stream}/#{basename}")
+      seen[stream] << basename
+    end
+
+    assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
+
+    seen.each_pair do |stream, basenames|
+      assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
+    end
+  end
+
+  def test_multilevel_collection_with_dirs_in_filenames
+    manifest = Keep::Manifest.new(MULTILEVEL_MANIFEST_WITH_DIRS_IN_FILENAMES)
+
+    seen = Hash.new { |this, key| this[key] = [] }
+    expected_sizes = {'.' => 3, './dir1' => 6, './dir1/dir2' => 11}
+
+    manifest.files.each do |stream, basename, size|
+      refute(seen[stream].include?(basename), "each_file repeated #{stream}/#{basename}")
+      assert_equal(expected_sizes[stream], size, "wrong size for #{stream}/#{basename}")
+      seen[stream] << basename
+    end
+
+    assert_equal(%w(. ./dir1 ./dir1/dir2), seen.keys)
+
+    seen.each_pair do |stream, basenames|
+      assert_equal(%w(file1), basenames.sort, "wrong file list for #{stream}")
+    end
+  end
+
+  [[false, nil],
+   [false, '+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e0+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0 '],
+   [false, "d41d8cd98f00b204e9800998ecf8427e+0\n"],
+   [false, ' d41d8cd98f00b204e9800998ecf8427e+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+K+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0+0'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e++'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K+'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0++K'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K++'],
+   [false, 'd41d8cd98f00b204e9800998ecf8427e+0+K++Z'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e', nil,nil,nil],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+0', '+0','0',nil],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Fizz+Buzz','+0','0','+Fizz+Buzz'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+Fizz+Buzz', nil,nil,'+Fizz+Buzz'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+0+Z', '+0','0','+Z'],
+   [true, 'd41d8cd98f00b204e9800998ecf8427e+Z', nil,nil,'+Z'],
+  ].each do |ok, locator, match2, match3, match4|
+    define_method "test_LOCATOR_REGEXP_on_#{locator.inspect}" do
+      match = Keep::Locator::LOCATOR_REGEXP.match locator
+      assert_equal ok, !!match
+      if ok
+        assert_equal match2, match[2]
+        assert_equal match3, match[3]
+        assert_equal match4, match[4]
+      end
+    end
+  end
+
+  [
+    [false, nil, "No manifest found"],
+    [true, ""],
+    [false, " ", "Invalid manifest: does not end with newline"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n"], # 2 locators
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.foo\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:...\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:.../.foo./.../bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/...\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.../bar\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/.bar/baz.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/bar./baz.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 000000000000000000000000000000:0777:foo.txt\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:0:0\n"],
+    [true, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\\040\n"],
+    [true, ". 00000000000000000000000000000000+0 0:0:0\n"],
+    [true, ". 00000000000000000000000000000000+0 0:0:d41d8cd98f00b204e9800998ecf8427e+0+Ad41d8cd98f00b204e9800998ecf8427e00000000@ffffffff\n"],
+    [false, '. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt',
+      "Invalid manifest: does not end with newline"],
+    [false, "abc d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"abc\""],
+    [false, "abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"abc/./foo\""],
+    [false, "./abc/../foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/../foo\""],
+    [false, "./abc/. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/.\""],
+    [false, "./abc/.. d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/..\""],
+    [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/./foo\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:.\n",
+      "invalid file token \"0:0:.\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:..\n",
+      "invalid file token \"0:0:..\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:./abc.txt\n",
+      "invalid file token \"0:0:./abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:../abc.txt\n",
+      "invalid file token \"0:0:../abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/.\n",
+      "invalid file token \"0:0:abc.txt/.\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt/..\n",
+      "invalid file token \"0:0:abc.txt/..\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/./bc.txt\n",
+      "invalid file token \"0:0:a/./bc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e 0:0:a/../bc.txt\n",
+      "invalid file token \"0:0:a/../bc.txt\""],
+    [false, "./abc/./foo d41d8cd98f00b204e9800998ecf8427e 0:0:abc.txt\n",
+      "invalid stream name \"./abc/./foo\""],
+    [false, "d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n",
+      "invalid stream name \"d41d8cd98f00b204e9800998ecf8427e+0\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427 0:0:abc.txt\n",
+      "invalid locator \"d41d8cd98f00b204e9800998ecf8427\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e\n",
+      "Manifest invalid for stream 1: no file tokens"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n/dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n",
+      "Manifest invalid for stream 2: missing or invalid stream name \"/dir1\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 d41d8cd98f00b204e9800998ecf842 0:0:abc.txt\n",
+      "Manifest invalid for stream 2: missing or invalid locator \"d41d8cd98f00b204e9800998ecf842\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 abc.txt\n",
+      "Manifest invalid for stream 2: invalid file token \"abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 0:abc.txt\n",
+      "Manifest invalid for stream 2: invalid file token \"0:abc.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt\n./dir1 a41d8cd98f00b204e9800998ecf8427e+0 0:0:abc.txt xyz.txt\n",
+      "Manifest invalid for stream 2: invalid file token \"xyz.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt d41d8cd98f00b204e9800998ecf8427e+0\n",
+      "Manifest invalid for stream 1: invalid file token \"d41d8cd98f00b204e9800998ecf8427e+0\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0\n",
+      "Manifest invalid for stream 1: no file tokens"],
+    [false, ". 0:0:foo.txt d41d8cd98f00b204e9800998ecf8427e+0\n",
+      "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""],
+    [false, ". 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid locator \"0:0:foo.txt\""],
+    [false, ".\n", "Manifest invalid for stream 1: missing or invalid locator"],
+    [false, ".", "Invalid manifest: does not end with newline"],
+    [false, ". \n", "Manifest invalid for stream 1: missing or invalid locator"],
+    [false, ".  \n", "Manifest invalid for stream 1: missing or invalid locator"],
+    [false, " . d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt \n",
+      "stream 1: trailing space"],
+   # TAB and other tricky whitespace characters:
+    [false, "\v. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"\\v."],
+    [false, "./foo\vbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo\\vbar"],
+    [false, "\t. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"\\t"],
+    [false, ".\td41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".\\t"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\t\n",
+      "stream 1: invalid file token \"0:0:foo.txt\\t\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0\t 0:0:foo.txt\n",
+      "stream 1: missing or invalid locator \"d41d8cd98f00b204e9800998ecf8427e+0\\t\""],
+    [false, "./foo\tbar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "stream 1: missing or invalid stream name \"./foo\\tbar\""],
+    # other whitespace errors:
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0  0:0:foo.txt\n",
+      "Manifest invalid for stream 1: invalid file token \"\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n \n",
+      "Manifest invalid for stream 2: missing stream name"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n\n",
+      "Manifest invalid for stream 2: missing stream name"],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n ",
+      "Invalid manifest: does not end with newline"],
+    [false, "\n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing stream name"],
+    [false, " \n. d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing stream name"],
+    # empty file and stream name components:
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:/foo.txt\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:/foo.txt\""],
+    [false, "./ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./\""],
+    [false, ".//foo d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \".//foo\""],
+    [false, "./foo/ d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo/\""],
+    [false, "./foo//bar d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n",
+      "Manifest invalid for stream 1: missing or invalid stream name \"./foo//bar\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo//bar.txt\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo//bar.txt\""],
+    [false, ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo/\n",
+      "Manifest invalid for stream 1: invalid file token \"0:0:foo/\""],
+  ].each do |ok, manifest, expected_error=nil|
+    define_method "test_validate manifest #{manifest.inspect}" do
+      assert_equal ok, Keep::Manifest.valid?(manifest)
+      if ok
+        assert Keep::Manifest.validate! manifest
+      else
+        begin
+          Keep::Manifest.validate! manifest
+        rescue ArgumentError => e
+          msg = e.message
+        end
+        refute_nil msg, "Expected ArgumentError"
+        assert msg.include?(expected_error), "Did not find expected error message. Expected: #{expected_error}; Actual: #{msg}"
+      end
+    end
+  end
  end
diff --git a/services/api/Gemfile b/services/api/Gemfile

index 70f67d5d76b10ecd5798c021045398372c4497a9..8f1b687a9cad9a01e593b08f1e34cee2d4db8e53 100644 (file)
--- a/services/api/Gemfile
+++ b/services/api/Gemfile
@@ -14,6 +14,7 @@ group :test, :development do
    # still mandatory.
    gem 'simplecov', '~> 0.7.1', require: false
    gem 'simplecov-rcov', require: false
+  gem 'mocha', require: false
  end
  
  # This might not be needed in :test and :development, but we load it
@@ -72,7 +73,7 @@ gem 'faye-websocket'
  
  gem 'themes_for_rails'
  
-gem 'arvados', '>= 0.1.20150210011250'
+gem 'arvados', '>= 0.1.20150615153458'
  gem 'arvados-cli', '>= 0.1.20150128223752'
  
  # pg_power lets us use partial indexes in schema.rb in Rails 3
diff --git a/services/api/Gemfile.lock b/services/api/Gemfile.lock

index a6a8326eeb7be74ea0f54cfcf296b52c69d3e51d..d671182a57c749b65bd9f7c93d2db215bc762f16 100644 (file)
--- a/services/api/Gemfile.lock
+++ b/services/api/Gemfile.lock
@@ -32,10 +32,10 @@ GEM
        activemodel (>= 3.0.0)
        activesupport (>= 3.0.0)
        rack (>= 1.1.0)
-    addressable (2.3.7)
+    addressable (2.3.8)
      andand (1.3.3)
      arel (3.0.3)
-    arvados (0.1.20150210011250)
+    arvados (0.1.20150615153458)
        activesupport (>= 3.2.13)
        andand (~> 1.3, >= 1.3.3)
        google-api-client (~> 0.6.3, >= 0.6.3)
@@ -44,7 +44,7 @@ GEM
      arvados-cli (0.1.20150205181653)
        activesupport (~> 3.2, >= 3.2.13)
        andand (~> 1.3, >= 1.3.3)
-      arvados (~> 0.1, >= 0.1.20150128223554)
+      arvados (~> 0.1, >= 0.1.20150615153458)
        curb (~> 0.8)
        google-api-client (~> 0.6.3, >= 0.6.3)
        json (~> 1.7, >= 1.7.7)
@@ -105,7 +105,7 @@ GEM
      jquery-rails (3.1.0)
        railties (>= 3.0, < 5.0)
        thor (>= 0.14, < 2.0)
-    json (1.8.2)
+    json (1.8.3)
      jwt (0.1.13)
        multi_json (>= 1.5)
      launchy (2.4.3)
@@ -114,8 +114,11 @@ GEM
      mail (2.5.4)
        mime-types (~> 1.16)
        treetop (~> 1.4.8)
+    metaclass (0.0.4)
      mime-types (1.25.1)
-    multi_json (1.10.1)
+    mocha (1.1.0)
+      metaclass (~> 0.0.1)
+    multi_json (1.11.1)
      multipart-post (1.2.0)
      net-scp (1.2.0)
        net-ssh (>= 2.6.5)
@@ -224,7 +227,7 @@ PLATFORMS
  DEPENDENCIES
    acts_as_api
    andand
-  arvados (>= 0.1.20150210011250)
+  arvados (>= 0.1.20150615153458)
    arvados-cli (>= 0.1.20150128223752)
    coffee-rails (~> 3.2.0)
    database_cleaner
@@ -232,6 +235,7 @@ DEPENDENCIES
    faye-websocket
    google-api-client (~> 0.6.3)
    jquery-rails
+  mocha
    multi_json
    oj
    omniauth (= 1.1.1)
diff --git a/services/api/Rakefile b/services/api/Rakefile

index 223f5ca2168c5ab25d316ef97dd3eb6081fb1463..fbbf53e3c6ac3d1c247b556db29f2f23141c4285 100644 (file)
--- a/services/api/Rakefile
+++ b/services/api/Rakefile
@@ -11,3 +11,55 @@ rescue
  end
  
  Server::Application.load_tasks
+
+namespace :test do
+  task(:run).clear
+  # Copied from the definition in Rails 3.2.
+  # This may need to be updated if we upgrade Rails.
+  task :run do
+    errors = %w(test:units test:functionals test:integration test:tasks).collect do |task|
+      begin
+        Rake::Task[task].invoke
+        nil
+      rescue => e
+        { :task => task, :exception => e }
+      end
+    end.compact
+
+    if errors.any?
+      puts errors.map { |e| "Errors running #{e[:task]}! #{e[:exception].inspect}" }.join("\n")
+      abort
+    end
+  end
+end
+
+namespace :db do
+  namespace :structure do
+    task :dump do
+      require 'tempfile'
+      origfnm = File.expand_path('../db/structure.sql', __FILE__)
+      tmpfnm = Tempfile.new 'structure.sql', File.expand_path('..', origfnm)
+      begin
+        tmpfile = File.new tmpfnm, 'w'
+        origfile = File.new origfnm
+        origfile.each_line do |line|
+          if /^SET lock_timeout = 0;/ =~ line
+            # Avoid edit wars between versions that do/don't write this line.
+            next
+          elsif /^COMMENT ON EXTENSION/ =~ line
+            # Avoid warning message when loading:
+            # "structure.sql:22: ERROR:  must be owner of extension plpgsql"
+            tmpfile.write "-- "
+          end
+          tmpfile.write line
+        end
+        origfile.close
+        tmpfile.close
+        File.rename tmpfnm, origfnm
+        tmpfnm = false
+      ensure
+        File.unlink tmpfnm if tmpfnm
+      end
+    end
+  end
+end
diff --git a/services/api/app/controllers/application_controller.rb b/services/api/app/controllers/application_controller.rb

index 69c03bde9fc0a1b22ca7a39a3bb7a78d94dd471c..e91e3ce03ec6369addee5d63964ac10e198e251a 100644 (file)
--- a/services/api/app/controllers/application_controller.rb
+++ b/services/api/app/controllers/application_controller.rb
@@ -191,9 +191,18 @@ class ApplicationController < ActionController::Base
             }.merge opts)
    end
  
+  def self.limit_index_columns_read
+    # This method returns a list of column names.
+    # If an index request reads that column from the database,
+    # find_objects_for_index will only fetch objects until it reads
+    # max_index_database_read bytes of data from those columns.
+    []
+  end
+
    def find_objects_for_index
      @objects ||= model_class.readable_by(*@read_users)
      apply_where_limit_order_params
+    limit_database_read if (action_name == "index")
    end
  
    def apply_filters model_class=nil
@@ -268,10 +277,7 @@ class ApplicationController < ActionController::Base
          # Map attribute names in @select to real column names, resolve
          # those to fully-qualified SQL column names, and pass the
          # resulting string to the select method.
-        api_column_map = model_class.attributes_required_columns
-        columns_list = @select.
-          flat_map { |attr| api_column_map[attr] }.
-          uniq.
+        columns_list = model_class.columns_for_attributes(@select).
            map { |s| "#{ar_table_name}.#{ActiveRecord::Base.connection.quote_column_name s}" }
          @objects = @objects.select(columns_list.join(", "))
        end
@@ -289,6 +295,34 @@ class ApplicationController < ActionController::Base
      @objects = @objects.uniq(@distinct) if not @distinct.nil?
    end
  
+  def limit_database_read
+    limit_columns = self.class.limit_index_columns_read
+    limit_columns &= model_class.columns_for_attributes(@select) if @select
+    return if limit_columns.empty?
+    model_class.transaction do
+      limit_query = @objects.
+        except(:select).
+        select("(%s) as read_length" %
+               limit_columns.map { |s| "octet_length(#{s})" }.join(" + "))
+      new_limit = 0
+      read_total = 0
+      limit_query.each do |record|
+        new_limit += 1
+        read_total += record.read_length.to_i
+        if read_total >= Rails.configuration.max_index_database_read
+          new_limit -= 1 if new_limit > 1
+          break
+        elsif new_limit >= @limit
+          break
+        end
+      end
+      @limit = new_limit
+      @objects = @objects.limit(@limit)
+      # Force @objects to run its query inside this transaction.
+      @objects.each { |_| break }
+    end
+  end
+
    def resource_attrs
      return @attrs if @attrs
      @attrs = params[resource_name]
diff --git a/services/api/app/controllers/arvados/v1/collections_controller.rb b/services/api/app/controllers/arvados/v1/collections_controller.rb

index 956de8e8942826bdb1ad1473c9d5e1f59631a8e9..44733cdfb82ff1c21c4ca379a723110ebcaf5721 100644 (file)
--- a/services/api/app/controllers/arvados/v1/collections_controller.rb
+++ b/services/api/app/controllers/arvados/v1/collections_controller.rb
@@ -1,6 +1,10 @@
  require "arvados/keep"
  
  class Arvados::V1::CollectionsController < ApplicationController
+  def self.limit_index_columns_read
+    ["manifest_text"]
+  end
+
    def create
      if resource_attrs[:uuid] and (loc = Keep::Locator.parse(resource_attrs[:uuid]))
        resource_attrs[:portable_data_hash] = loc.to_s
diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb

index ce8a05cba26888d649f6fc28cf7de8de55ee6407..f1ef2d824054f3a0dbe3bb338a966d3a00341b10 100644 (file)
--- a/services/api/app/controllers/arvados/v1/jobs_controller.rb
+++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb
@@ -33,20 +33,27 @@ class Arvados::V1::JobsController < ApplicationController
          @filters =
            [["repository", "=", resource_attrs[:repository]],
             ["script", "=", resource_attrs[:script]],
-           ["script_version", "in git",
-            params[:minimum_script_version] || resource_attrs[:script_version]],
             ["script_version", "not in git", params[:exclude_script_versions]],
            ].reject { |filter| filter.last.nil? or filter.last.empty? }
+        if !params[:minimum_script_version].blank?
+          @filters << ["script_version", "in git",
+                       params[:minimum_script_version]]
+        else
+          add_default_git_filter("script_version", resource_attrs[:repository],
+                                 resource_attrs[:script_version])
+        end
          if image_search = resource_attrs[:runtime_constraints].andand["docker_image"]
            if image_tag = resource_attrs[:runtime_constraints]["docker_image_tag"]
              image_search += ":#{image_tag}"
            end
-          @filters.append(["docker_image_locator", "in docker", image_search])
+          image_locator = Collection.
+            for_latest_docker_image(image_search).andand.portable_data_hash
          else
-          @filters.append(["docker_image_locator", "=", nil])
+          image_locator = nil
          end
+        @filters << ["docker_image_locator", "=", image_locator]
          if sdk_version = resource_attrs[:runtime_constraints].andand["arvados_sdk_version"]
-          @filters.append(["arvados_sdk_version", "in git", sdk_version])
+          add_default_git_filter("arvados_sdk_version", "arvados", sdk_version)
          end
          begin
            load_job_specific_filters
@@ -199,6 +206,16 @@ class Arvados::V1::JobsController < ApplicationController
  
    protected
  
+  def add_default_git_filter(attr_name, repo_name, refspec)
+    # Add a filter to @filters for `attr_name` = the latest commit available
+    # in `repo_name` at `refspec`.  No filter is added if refspec can't be
+    # resolved.
+    commits = Commit.find_commit_range(repo_name, nil, refspec, nil)
+    if commit_hash = commits.first
+      @filters << [attr_name, "=", commit_hash]
+    end
+  end
+
    def load_job_specific_filters
      # Convert Job-specific @filters entries into general SQL filters.
      script_info = {"repository" => nil, "script" => nil}
@@ -254,18 +271,17 @@ class Arvados::V1::JobsController < ApplicationController
        else
          raise ArgumentError.new("unknown attribute for git filter: #{attr}")
        end
-      version_range = Commit.find_commit_range(current_user,
-                                               filter["repository"],
-                                               filter["min_version"],
-                                               filter["max_version"],
-                                               filter["exclude_versions"])
-      if version_range.nil?
+      revisions = Commit.find_commit_range(filter["repository"],
+                                           filter["min_version"],
+                                           filter["max_version"],
+                                           filter["exclude_versions"])
+      if revisions.empty?
          raise ArgumentError.
            new("error searching #{filter['repository']} from " +
                "'#{filter['min_version']}' to '#{filter['max_version']}', " +
                "excluding #{filter['exclude_versions']}")
        end
-      @filters.append([attr, "in", version_range])
+      @filters.append([attr, "in", revisions])
      end
    end
  
diff --git a/services/api/app/controllers/arvados/v1/repositories_controller.rb b/services/api/app/controllers/arvados/v1/repositories_controller.rb

index b5123d97e3017af8b9c5fbcbf4e8d98a81a91436..4bf9a6a0945462e2bf74596d620ec21575541844 100644 (file)
--- a/services/api/app/controllers/arvados/v1/repositories_controller.rb
+++ b/services/api/app/controllers/arvados/v1/repositories_controller.rb
@@ -2,77 +2,120 @@ class Arvados::V1::RepositoriesController < ApplicationController
    skip_before_filter :find_object_by_uuid, :only => :get_all_permissions
    skip_before_filter :render_404_if_no_object, :only => :get_all_permissions
    before_filter :admin_required, :only => :get_all_permissions
+
    def get_all_permissions
-    @users = {}
-    User.includes(:authorized_keys).all.each do |u|
-      @users[u.uuid] = u
+    # users is a map of {user_uuid => User object}
+    users = {}
+    # user_aks is a map of {user_uuid => array of public keys}
+    user_aks = {}
+    # admins is an array of user_uuids
+    admins = []
+    User.eager_load(:authorized_keys).find_each do |u|
+      next unless u.is_active or u.uuid == anonymous_user_uuid
+      users[u.uuid] = u
+      user_aks[u.uuid] = u.authorized_keys.collect do |ak|
+        {
+          public_key: ak.public_key,
+          authorized_key_uuid: ak.uuid
+        }
+      end
+      admins << u.uuid if u.is_admin
      end
-    admins = @users.select { |k,v| v.is_admin }
-    @user_aks = {}
      @repo_info = {}
-    @repos = Repository.includes(:permissions).all
-    @repos.each do |repo|
-      gitolite_permissions = ''
-      perms = []
+    Repository.eager_load(:permissions).find_each do |repo|
+      @repo_info[repo.uuid] = {
+        uuid: repo.uuid,
+        name: repo.name,
+        push_url: repo.push_url,
+        fetch_url: repo.fetch_url,
+        user_permissions: {},
+      }
+      # evidence is an array of {name: 'can_xxx', user_uuid: 'x-y-z'},
+      # one entry for each piece of evidence we find in the permission
+      # database that establishes that a user can access this
+      # repository. Multiple entries can be added for a given user,
+      # possibly with different access levels; these will be compacted
+      # below.
+      evidence = []
        repo.permissions.each do |perm|
          if ArvadosModel::resource_class_for_uuid(perm.tail_uuid) == Group
-          @users.each do |user_uuid, user|
-            user.group_permissions.each do |group_uuid, perm_mask|
-              if perm_mask[:manage]
-                perms << {name: 'can_manage', user_uuid: user_uuid}
-              elsif perm_mask[:write]
-                perms << {name: 'can_write', user_uuid: user_uuid}
-              elsif perm_mask[:read]
-                perms << {name: 'can_read', user_uuid: user_uuid}
-              end
+          # A group has permission. Each user who has access to this
+          # group also has access to the repository. Access level is
+          # min(group-to-repo permission, user-to-group permission).
+          users.each do |user_uuid, user|
+            perm_mask = user.group_permissions[perm.tail_uuid]
+            if not perm_mask
+              next
+            elsif perm_mask[:manage] and perm.name == 'can_manage'
+              evidence << {name: 'can_manage', user_uuid: user_uuid}
+            elsif perm_mask[:write] and ['can_manage', 'can_write'].index perm.name
+              evidence << {name: 'can_write', user_uuid: user_uuid}
+            elsif perm_mask[:read]
+              evidence << {name: 'can_read', user_uuid: user_uuid}
              end
            end
-        else
-          perms << {name: perm.name, user_uuid: perm.tail_uuid}
+        elsif users[perm.tail_uuid]
+          # A user has permission; the user exists; and either the
+          # user is active, or it's the special case of the anonymous
+          # user which is never "active" but is allowed to read
+          # content from public repositories.
+          evidence << {name: perm.name, user_uuid: perm.tail_uuid}
          end
        end
-      # Owner of the repository, and all admins, can RW
-      ([repo.owner_uuid] + admins.keys).each do |user_uuid|
-        perms << {name: 'can_write', user_uuid: user_uuid}
+      # Owner of the repository, and all admins, can do everything.
+      ([repo.owner_uuid] | admins).each do |user_uuid|
+        # Except: no permissions for inactive users, even if they own
+        # repositories.
+        next unless users[user_uuid]
+        evidence << {name: 'can_manage', user_uuid: user_uuid}
        end
-      perms.each do |perm|
+      # Distill all the evidence about permissions on this repository
+      # into one hash per user, of the form {'can_xxx' => true, ...}.
+      # The hash is nil for a user who has no permissions at all on
+      # this particular repository.
+      evidence.each do |perm|
          user_uuid = perm[:user_uuid]
-        @user_aks[user_uuid] = @users[user_uuid].andand.authorized_keys.andand.
-          collect do |ak|
-          {
-            public_key: ak.public_key,
-            authorized_key_uuid: ak.uuid
-          }
-        end || []
-        if @user_aks[user_uuid].any?
-          @repo_info[repo.uuid] ||= {
-            uuid: repo.uuid,
-            name: repo.name,
-            push_url: repo.push_url,
-            fetch_url: repo.fetch_url,
-            user_permissions: {}
-          }
-          ri = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
-          ri[perm[:name]] = true
-        end
+        user_perms = (@repo_info[repo.uuid][:user_permissions][user_uuid] ||= {})
+        user_perms[perm[:name]] = true
        end
      end
-    @repo_info.values.each do |repo_users|
-      repo_users[:user_permissions].each do |user_uuid,perms|
-        if perms['can_manage']
-          perms[:gitolite_permissions] = 'RW'
-          perms['can_write'] = true
-          perms['can_read'] = true
-        elsif perms['can_write']
-          perms[:gitolite_permissions] = 'RW'
-          perms['can_read'] = true
-        elsif perms['can_read']
-          perms[:gitolite_permissions] = 'R'
+    # Revisit each {'can_xxx' => true, ...} hash for some final
+    # cleanup to make life easier for the requestor.
+    #
+    # Add a 'gitolite_permissions' key alongside the 'can_xxx' keys,
+    # for the convenience of the gitolite config file generator.
+    #
+    # Add all lesser permissions when a greater permission is
+    # present. If the requestor only wants to know who can write, it
+    # only has to test for 'can_write' in the response.
+    @repo_info.values.each do |repo|
+      repo[:user_permissions].each do |user_uuid, user_perms|
+        if user_perms['can_manage']
+          user_perms['gitolite_permissions'] = 'RW'
+          user_perms['can_write'] = true
+          user_perms['can_read'] = true
+        elsif user_perms['can_write']
+          user_perms['gitolite_permissions'] = 'RW'
+          user_perms['can_read'] = true
+        elsif user_perms['can_read']
+          user_perms['gitolite_permissions'] = 'R'
          end
        end
      end
+    # The response looks like
+    #   {"kind":"...",
+    #    "repositories":[r1,r2,r3,...],
+    #    "user_keys":usermap}
+    # where each of r1,r2,r3 looks like
+    #   {"uuid":"repo-uuid-1",
+    #    "name":"username/reponame",
+    #    "push_url":"...",
+    #    "user_permissions":{"user-uuid-a":{"can_read":true,"gitolite_permissions":"R"}}}
+    # and usermap looks like
+    #   {"user-uuid-a":[{"public_key":"ssh-rsa g...","authorized_key_uuid":"ak-uuid-g"},...],
+    #    "user-uuid-b":[{"public_key":"ssh-rsa h...","authorized_key_uuid":"ak-uuid-h"},...],...}
      send_json(kind: 'arvados#RepositoryPermissionSnapshot',
                repositories: @repo_info.values,
-              user_keys: @user_aks)
+              user_keys: user_aks)
    end
  end
diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb

index dcc9c639793eebe17f39a21f05990c81512431ce..62d5e59c8d142ce5116da263c9314def02b670d1 100644 (file)
--- a/services/api/app/controllers/arvados/v1/schema_controller.rb
+++ b/services/api/app/controllers/arvados/v1/schema_controller.rb
@@ -35,6 +35,7 @@ class Arvados::V1::SchemaController < ApplicationController
          servicePath: "arvados/v1/",
          batchPath: "batch",
          defaultTrashLifetime: Rails.application.config.default_trash_lifetime,
+        blobSignatureTtl: Rails.application.config.blob_signature_ttl,
          maxRequestSize: Rails.application.config.max_request_size,
          parameters: {
            alt: {
diff --git a/services/api/app/controllers/arvados/v1/users_controller.rb b/services/api/app/controllers/arvados/v1/users_controller.rb

index 131ee5236bc08e26afb096912505594db0755f67..03efed999fcb9791df63d4c6bc8475003f55b4c7 100644 (file)
--- a/services/api/app/controllers/arvados/v1/users_controller.rb
+++ b/services/api/app/controllers/arvados/v1/users_controller.rb
@@ -96,12 +96,29 @@ class Arvados::V1::UsersController < ApplicationController
        end
      end
  
+    # It's not always possible for the client to know the user's
+    # username when submitting this request: the username might have
+    # been assigned automatically in create!() above. If client
+    # provided a plain repository name, prefix it with the username
+    # now that we know what it is.
+    if params[:repo_name].nil?
+      full_repo_name = nil
+    elsif @object.username.nil?
+      raise ArgumentError.
+        new("cannot setup a repository because user has no username")
+    elsif object_found and
+        params[:repo_name].start_with?("#{@object.username}/")
+      full_repo_name = params[:repo_name]
+    else
+      full_repo_name = "#{@object.username}/#{params[:repo_name]}"
+    end
+
      if object_found
-      @response = @object.setup_repo_vm_links params[:repo_name],
+      @response = @object.setup_repo_vm_links full_repo_name,
                      params[:vm_uuid], params[:openid_prefix]
      else
        @response = User.setup @object, params[:openid_prefix],
-                    params[:repo_name], params[:vm_uuid]
+                    full_repo_name, params[:vm_uuid]
      end
  
      # setup succeeded. send email to user
diff --git a/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb b/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb

index 35e5e4257afa5745e1e5a36337a284da2a141ccd..e6474aa4e0328a6759039921b9962d627b0b374d 100644 (file)
--- a/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
+++ b/services/api/app/controllers/arvados/v1/virtual_machines_controller.rb
@@ -4,37 +4,49 @@ class Arvados::V1::VirtualMachinesController < ApplicationController
    before_filter(:admin_required,
                  :only => [:logins, :get_all_logins])
  
+  # Get all login permissons (user uuid, login account, SSH key) for a
+  # single VM
    def logins
-    get_all_logins
+    render_logins_for VirtualMachine.where(uuid: @object.uuid)
    end
  
+  # Get all login permissons for all VMs
    def get_all_logins
+    render_logins_for VirtualMachine
+  end
+
+  protected
+
+  def render_logins_for vm_query
+    @response = []
+    @vms = vm_query.eager_load :login_permissions
      @users = {}
-    User.includes(:authorized_keys).all.each do |u|
+    User.eager_load(:authorized_keys).
+      where('users.uuid in (?)',
+            @vms.map { |vm| vm.login_permissions.map &:tail_uuid }.flatten.uniq).
+      each do |u|
        @users[u.uuid] = u
      end
-    @response = []
-    @vms = VirtualMachine.includes(:login_permissions)
-    if @object
-      @vms = @vms.where('uuid=?', @object.uuid)
-    else
-      @vms = @vms.all
-    end
      @vms.each do |vm|
        vm.login_permissions.each do |perm|
          user_uuid = perm.tail_uuid
-        @users[user_uuid].andand.authorized_keys.andand.each do |ak|
-          username = perm.properties.andand['username']
-          if username
-            @response << {
-              username: username,
-              hostname: vm.hostname,
-              public_key: ak.public_key,
-              user_uuid: user_uuid,
-              virtual_machine_uuid: vm.uuid,
-              authorized_key_uuid: ak.uuid
-            }
-          end
+        next if not @users[user_uuid]
+        next if perm.properties['username'].blank?
+        aks = @users[user_uuid].authorized_keys
+        if aks.empty?
+          # We'll emit one entry, with no public key.
+          aks = [nil]
+        end
+        aks.each do |ak|
+          @response << {
+            username: perm.properties['username'],
+            hostname: vm.hostname,
+            groups: (perm.properties['groups'].to_a rescue []),
+            public_key: ak ? ak.public_key : nil,
+            user_uuid: user_uuid,
+            virtual_machine_uuid: vm.uuid,
+            authorized_key_uuid: ak ? ak.uuid : nil,
+          }
          end
        end
      end
diff --git a/services/api/app/controllers/user_sessions_controller.rb b/services/api/app/controllers/user_sessions_controller.rb

index 256a67bcbb55aa426e405312fd3908e9dc1177dd..795b114bf91930447fd777686015ef97ce001a49 100644 (file)
--- a/services/api/app/controllers/user_sessions_controller.rb
+++ b/services/api/app/controllers/user_sessions_controller.rb
@@ -15,7 +15,7 @@ class UserSessionsController < ApplicationController
      unless identity_url_ok
        # Whoa. This should never happen.
        logger.error "UserSessionsController.create: omniauth object missing/invalid"
-      logger.error "omniauth.pretty_inspect():\n\n#{omniauth.pretty_inspect()}"
+      logger.error "omniauth: "+omniauth.pretty_inspect
  
        return redirect_to login_failure_url
      end
@@ -93,7 +93,7 @@ class UserSessionsController < ApplicationController
  
      flash[:notice] = 'You have logged off'
      return_to = params[:return_to] || root_url
-    redirect_to "#{CUSTOM_PROVIDER_URL}/users/sign_out?redirect_uri=#{CGI.escape return_to}"
+    redirect_to "#{Rails.configuration.sso_provider_url}/users/sign_out?redirect_uri=#{CGI.escape return_to}"
    end
  
    # login - Just bounce to /auth/joshid. The only purpose of this function is
diff --git a/services/api/app/mailers/user_notifier.rb b/services/api/app/mailers/user_notifier.rb

index 055fe3ad04a27e411d5e0a1b129ff2b3f8ae5887..7c96d718cc3246c78d536f0580c8f83486584186 100644 (file)
--- a/services/api/app/mailers/user_notifier.rb
+++ b/services/api/app/mailers/user_notifier.rb
@@ -5,7 +5,7 @@ class UserNotifier < ActionMailer::Base
  
    def account_is_setup(user)
      @user = user
-    mail(to: user.email, subject: 'Welcome to Curoverse')
+    mail(to: user.email, subject: 'Welcome to Curoverse - shell account enabled')
    end
  
  end
diff --git a/services/api/app/models/arvados_model.rb b/services/api/app/models/arvados_model.rb

index 1fe58088483fad98e34531391bd2b21a5bf91deb..35dd1a94c9d983b343fc6394370f03ca795ca896 100644 (file)
--- a/services/api/app/models/arvados_model.rb
+++ b/services/api/app/models/arvados_model.rb
@@ -23,6 +23,7 @@ class ArvadosModel < ActiveRecord::Base
    after_destroy :log_destroy
    after_find :convert_serialized_symbols_to_strings
    before_validation :normalize_collection_uuids
+  before_validation :set_default_owner
    validate :ensure_serialized_attribute_type
    validate :ensure_valid_uuids
  
@@ -103,6 +104,13 @@ class ArvadosModel < ActiveRecord::Base
      api_column_map
    end
  
+  def self.columns_for_attributes(select_attributes)
+    # Given an array of attribute names to select, return an array of column
+    # names that must be fetched from the database to satisfy the request.
+    api_column_map = attributes_required_columns
+    select_attributes.flat_map { |attr| api_column_map[attr] }.uniq
+  end
+
    def self.default_orders
      ["#{table_name}.modified_at desc", "#{table_name}.uuid"]
    end
@@ -219,21 +227,20 @@ class ArvadosModel < ActiveRecord::Base
  
    def self.full_text_searchable_columns
      self.columns.select do |col|
-      if col.type == :string or col.type == :text
-        true
-      end
+      col.type == :string or col.type == :text
      end.map(&:name)
    end
  
    def self.full_text_tsvector
-    tsvector_str = "to_tsvector('english', "
-    first = true
-    self.full_text_searchable_columns.each do |column|
-      tsvector_str += " || ' ' || " if not first
-      tsvector_str += "coalesce(#{column},'')"
-      first = false
+    parts = full_text_searchable_columns.collect do |column|
+      "coalesce(#{column},'')"
      end
-    tsvector_str += ")"
+    # We prepend a space to the tsvector() argument here. Otherwise,
+    # it might start with a column that has its own (non-full-text)
+    # index, which causes Postgres to use the column index instead of
+    # the tsvector index, which causes full text queries to be just as
+    # slow as if we had no index at all.
+    "to_tsvector('english', ' ' || #{parts.join(" || ' ' || ")})"
    end
  
    protected
@@ -270,12 +277,14 @@ class ArvadosModel < ActiveRecord::Base
      true
    end
  
-  def ensure_owner_uuid_is_permitted
-    raise PermissionDeniedError if !current_user
-
-    if new_record? and respond_to? :owner_uuid=
+  def set_default_owner
+    if new_record? and current_user and respond_to? :owner_uuid=
        self.owner_uuid ||= current_user.uuid
      end
+  end
+
+  def ensure_owner_uuid_is_permitted
+    raise PermissionDeniedError if !current_user
  
      if self.owner_uuid.nil?
        errors.add :owner_uuid, "cannot be nil"
@@ -308,8 +317,13 @@ class ArvadosModel < ActiveRecord::Base
      # Verify "write" permission on new owner
      # default fail unless one of:
      # current_user is this object
-    # current user can_write new owner
-    unless current_user == self or current_user.can? write: owner_uuid
+    # current user can_write new owner, or this object if owner unchanged
+    if new_record? or owner_uuid_changed? or is_a?(ApiClientAuthorization)
+      write_target = owner_uuid
+    else
+      write_target = uuid
+    end
+    unless current_user == self or current_user.can? write: write_target
        logger.warn "User #{current_user.uuid} tried to modify #{self.class.to_s} #{uuid} but does not have permission to write new owner_uuid #{owner_uuid}"
        errors.add :owner_uuid, "cannot be changed without write permission on new owner"
        raise PermissionDeniedError
diff --git a/services/api/app/models/authorized_key.rb b/services/api/app/models/authorized_key.rb

index b156a1d0f697440ae0912ff352049bdf0de28c2a..452cd6967bfa7c40d22f8746cc69c2ea10c7fc89 100644 (file)
--- a/services/api/app/models/authorized_key.rb
+++ b/services/api/app/models/authorized_key.rb
@@ -33,14 +33,14 @@ class AuthorizedKey < ArvadosModel
  
    def public_key_must_be_unique
      if self.public_key
-      #key = /^ssh-(rsa|dss) [A-Za-z0-9+\/=\+]+\b/.match(self.public_key)
        valid_key = SSHKey.valid_ssh_public_key? self.public_key
  
        if not valid_key
          errors.add(:public_key, "does not appear to be a valid ssh-rsa or dsa public key")
        else
          # Valid if no other rows have this public key
-        if self.class.where('public_key like ?', "%#{self.public_key}%").any?
+        if self.class.where('uuid != ? and public_key like ?',
+                            uuid || '', "%#{self.public_key}%").any?
            errors.add(:public_key, "already exists in the database, use a different key.")
            return false
          end
diff --git a/services/api/app/models/blob.rb b/services/api/app/models/blob.rb

index 799279d0400f31c2cf54feb503a1f48401e96321..34600d7a25a8c716bd9d1fd6ec49cea052dc0c58 100644 (file)
--- a/services/api/app/models/blob.rb
+++ b/services/api/app/models/blob.rb
@@ -28,8 +28,8 @@ class Blob
    # Blob.sign_locator: return a signed and timestamped blob locator.
    #
    # The 'opts' argument should include:
-  #   [required] :key       - the Arvados server-side blobstore key
-  #   [required] :api_token - user's API token
+  #   [required] :api_token - API token (signatures only work for this token)
+  #   [optional] :key       - the Arvados server-side blobstore key
    #   [optional] :ttl       - number of seconds before signature should expire
    #   [optional] :expire    - unix timestamp when signature should expire
    #
@@ -44,14 +44,16 @@ class Blob
        end
        timestamp = opts[:expire]
      else
-      timestamp = db_current_time.to_i + (opts[:ttl] || 1209600)
+      timestamp = db_current_time.to_i +
+        (opts[:ttl] || Rails.configuration.blob_signature_ttl)
      end
      timestamp_hex = timestamp.to_s(16)
      # => "53163cb4"
  
      # Generate a signature.
      signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp_hex
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp_hex)
  
      blob_locator + '+A' + signature + '@' + timestamp_hex
    end
@@ -88,15 +90,16 @@ class Blob
      if !timestamp
        raise Blob::InvalidSignatureError.new 'No signature provided.'
      end
-    if !timestamp.match /^[\da-f]+$/
+    unless timestamp =~ /^[\da-f]+$/
        raise Blob::InvalidSignatureError.new 'Timestamp is not a base16 number.'
      end
-    if timestamp.to_i(16) < db_current_time.to_i
+    if timestamp.to_i(16) < (opts[:now] or db_current_time.to_i)
        raise Blob::InvalidSignatureError.new 'Signature expiry time has passed.'
      end
  
      my_signature =
-      generate_signature opts[:key], blob_hash, opts[:api_token], timestamp
+      generate_signature((opts[:key] or Rails.configuration.blob_signing_key),
+                         blob_hash, opts[:api_token], timestamp)
  
      if my_signature != given_signature
        raise Blob::InvalidSignatureError.new 'Signature is invalid.'
diff --git a/services/api/app/models/collection.rb b/services/api/app/models/collection.rb

index 89ad874cd7d211aae90f841927a620150af1ab9b..f1e7b4f1e164c8525118a5d62c5f06de3d5e54e1 100644 (file)
--- a/services/api/app/models/collection.rb
+++ b/services/api/app/models/collection.rb
@@ -1,18 +1,19 @@
  require 'arvados/keep'
  
  class Collection < ArvadosModel
+  extend DbCurrentTime
    include HasUuid
    include KindAndEtag
    include CommonApiTemplate
  
    serialize :properties, Hash
  
+  before_validation :default_empty_manifest
    before_validation :check_encoding
+  before_validation :check_manifest_validity
    before_validation :check_signatures
-  before_validation :strip_manifest_text
-  before_validation :set_portable_data_hash
-  before_validation :maybe_clear_replication_confirmed
-  validate :ensure_hash_matches_manifest_text
+  before_validation :strip_signatures_and_update_replication_confirmed
+  validate :ensure_pdh_matches_manifest_text
    before_save :set_file_names
  
    # Query only undeleted collections by default.
@@ -40,6 +41,7 @@ class Collection < ArvadosModel
                  )
    end
  
+  FILE_TOKEN = /^[[:digit:]]+:[[:digit:]]+:/
    def check_signatures
      return false if self.manifest_text.nil?
  
@@ -50,7 +52,7 @@ class Collection < ArvadosModel
      # subsequent passes without checking any signatures. This is
      # important because the signatures have probably been stripped off
      # by the time we get to a second validation pass!
-    return true if @signatures_checked and @signatures_checked == compute_pdh
+    return true if @signatures_checked and @signatures_checked == computed_pdh
  
      if self.manifest_text_changed?
        # Check permissions on the collection manifest.
@@ -58,13 +60,14 @@ class Collection < ArvadosModel
        # which will return 403 Permission denied to the client.
        api_token = current_api_client_authorization.andand.api_token
        signing_opts = {
-        key: Rails.configuration.blob_signing_key,
          api_token: api_token,
-        ttl: Rails.configuration.blob_signing_ttl,
+        now: db_current_time.to_i,
        }
-      self.manifest_text.lines.each do |entry|
-        entry.split[1..-1].each do |tok|
-          if /^[[:digit:]]+:[[:digit:]]+:/.match tok
+      self.manifest_text.each_line do |entry|
+        entry.split.each do |tok|
+          if tok == '.' or tok.starts_with? './'
+            # Stream name token.
+          elsif tok =~ FILE_TOKEN
              # This is a filename token, not a blob locator. Note that we
              # keep checking tokens after this, even though manifest
              # format dictates that all subsequent tokens will also be
@@ -87,52 +90,59 @@ class Collection < ArvadosModel
          end
        end
      end
-    @signatures_checked = compute_pdh
+    @signatures_checked = computed_pdh
    end
  
-  def strip_manifest_text
+  def strip_signatures_and_update_replication_confirmed
      if self.manifest_text_changed?
-      # Remove any permission signatures from the manifest.
-      self.class.munge_manifest_locators!(self[:manifest_text]) do |loc|
-        loc.without_signature.to_s
+      in_old_manifest = {}
+      if not self.replication_confirmed.nil?
+        self.class.each_manifest_locator(manifest_text_was) do |match|
+          in_old_manifest[match[1]] = true
+        end
        end
-    end
-    true
-  end
  
-  def set_portable_data_hash
-    if (portable_data_hash.nil? or
-        portable_data_hash == "" or
-        (manifest_text_changed? and !portable_data_hash_changed?))
-      @need_pdh_validation = false
-      self.portable_data_hash = compute_pdh
-    elsif portable_data_hash_changed?
-      @need_pdh_validation = true
-      begin
-        loc = Keep::Locator.parse!(self.portable_data_hash)
-        loc.strip_hints!
-        if loc.size
-          self.portable_data_hash = loc.to_s
-        else
-          self.portable_data_hash = "#{loc.hash}+#{portable_manifest_text.bytesize}"
+      stripped_manifest = self.class.munge_manifest_locators(manifest_text) do |match|
+        if not self.replication_confirmed.nil? and not in_old_manifest[match[1]]
+          # If the new manifest_text contains locators whose hashes
+          # weren't in the old manifest_text, storage replication is no
+          # longer confirmed.
+          self.replication_confirmed_at = nil
+          self.replication_confirmed = nil
          end
-      rescue ArgumentError => e
-        errors.add(:portable_data_hash, "#{e}")
-        return false
+
+        # Return the locator with all permission signatures removed,
+        # but otherwise intact.
+        match[0].gsub(/\+A[^+]*/, '')
+      end
+
+      if @computed_pdh_for_manifest_text == manifest_text
+        # If the cached PDH was valid before stripping, it is still
+        # valid after stripping.
+        @computed_pdh_for_manifest_text = stripped_manifest.dup
        end
+
+      self[:manifest_text] = stripped_manifest
      end
      true
    end
  
-  def ensure_hash_matches_manifest_text
-    return true unless manifest_text_changed? or portable_data_hash_changed?
-    # No need verify it if :set_portable_data_hash just computed it!
-    return true if not @need_pdh_validation
-    expect_pdh = compute_pdh
-    if expect_pdh != portable_data_hash
+  def ensure_pdh_matches_manifest_text
+    if not manifest_text_changed? and not portable_data_hash_changed?
+      true
+    elsif portable_data_hash.nil? or not portable_data_hash_changed?
+      self.portable_data_hash = computed_pdh
+    elsif portable_data_hash !~ Keep::Locator::LOCATOR_REGEXP
+      errors.add(:portable_data_hash, "is not a valid locator")
+      false
+    elsif portable_data_hash[0..31] != computed_pdh[0..31]
        errors.add(:portable_data_hash,
-                 "does not match computed hash #{expect_pdh}")
-      return false
+                 "does not match computed hash #{computed_pdh}")
+      false
+    else
+      # Ignore the client-provided size part: always store
+      # computed_pdh in the database.
+      self.portable_data_hash = computed_pdh
      end
    end
  
@@ -162,6 +172,10 @@ class Collection < ArvadosModel
      names[0,2**12]
    end
  
+  def default_empty_manifest
+    self.manifest_text ||= ''
+  end
+
    def check_encoding
      if manifest_text.encoding.name == 'UTF-8' and manifest_text.valid_encoding?
        true
@@ -184,6 +198,16 @@ class Collection < ArvadosModel
      end
    end
  
+  def check_manifest_validity
+    begin
+      Keep::Manifest.validate! manifest_text
+      true
+    rescue ArgumentError => e
+      errors.add :manifest_text, e.message
+      false
+    end
+  end
+
    def signed_manifest_text
      if has_attribute? :manifest_text
        token = current_api_client_authorization.andand.api_token
@@ -193,34 +217,50 @@ class Collection < ArvadosModel
  
    def self.sign_manifest manifest, token
      signing_opts = {
-      key: Rails.configuration.blob_signing_key,
        api_token: token,
-      ttl: Rails.configuration.blob_signing_ttl,
+      expire: db_current_time.to_i + Rails.configuration.blob_signature_ttl,
      }
-    m = manifest.dup
-    munge_manifest_locators!(m) do |loc|
-      Blob.sign_locator(loc.to_s, signing_opts)
+    m = munge_manifest_locators(manifest) do |match|
+      Blob.sign_locator(match[0], signing_opts)
      end
      return m
    end
  
-  def self.munge_manifest_locators! manifest
-    # Given a manifest text and a block, yield each locator,
-    # and replace it with whatever the block returns.
-    manifest.andand.gsub!(/ [[:xdigit:]]{32}(\+\S+)?/) do |word|
-      if loc = Keep::Locator.parse(word.strip)
-        " " + yield(loc)
-      else
-        " " + word
+  def self.munge_manifest_locators manifest
+    # Given a manifest text and a block, yield the regexp MatchData
+    # for each locator. Return a new manifest in which each locator
+    # has been replaced by the block's return value.
+    return nil if !manifest
+    return '' if manifest == ''
+
+    new_lines = []
+    manifest.each_line do |line|
+      line.rstrip!
+      new_words = []
+      line.split(' ').each do |word|
+        if new_words.empty?
+          new_words << word
+        elsif match = Keep::Locator::LOCATOR_REGEXP.match(word)
+          new_words << yield(match)
+        else
+          new_words << word
+        end
        end
+      new_lines << new_words.join(' ')
      end
+    new_lines.join("\n") + "\n"
    end
  
    def self.each_manifest_locator manifest
-    # Given a manifest text and a block, yield each locator.
-    manifest.andand.scan(/ ([[:xdigit:]]{32}(\+\S+)?)/) do |word, _|
-      if loc = Keep::Locator.parse(word)
-        yield loc
+    # Given a manifest text and a block, yield the regexp match object
+    # for each locator.
+    manifest.each_line do |line|
+      # line will have a trailing newline, but the last token is never
+      # a locator, so it's harmless here.
+      line.split(' ').each do |word|
+        if match = Keep::Locator::LOCATOR_REGEXP.match(word)
+          yield(match)
+        end
        end
      end
    end
@@ -308,15 +348,13 @@ class Collection < ArvadosModel
  
    protected
    def portable_manifest_text
-    portable_manifest = self[:manifest_text].dup
-    self.class.munge_manifest_locators!(portable_manifest) do |loc|
-      if loc.size
-        loc.hash + '+' + loc.size.to_s
+    self.class.munge_manifest_locators(manifest_text) do |match|
+      if match[2] # size
+        match[1] + match[2]
        else
-        loc.hash
+        match[1]
        end
      end
-    portable_manifest
    end
  
    def compute_pdh
@@ -326,23 +364,13 @@ class Collection < ArvadosModel
       portable_manifest.bytesize.to_s)
    end
  
-  def maybe_clear_replication_confirmed
-    if manifest_text_changed?
-      # If the new manifest_text contains locators whose hashes
-      # weren't in the old manifest_text, storage replication is no
-      # longer confirmed.
-      in_old_manifest = {}
-      self.class.each_manifest_locator(manifest_text_was) do |loc|
-        in_old_manifest[loc.hash] = true
-      end
-      self.class.each_manifest_locator(manifest_text) do |loc|
-        if not in_old_manifest[loc.hash]
-          self.replication_confirmed_at = nil
-          self.replication_confirmed = nil
-          break
-        end
-      end
+  def computed_pdh
+    if @computed_pdh_for_manifest_text == manifest_text
+      return @computed_pdh
      end
+    @computed_pdh = compute_pdh
+    @computed_pdh_for_manifest_text = manifest_text.dup
+    @computed_pdh
    end
  
    def ensure_permission_to_save
diff --git a/services/api/app/models/commit.rb b/services/api/app/models/commit.rb

index 0f62737cea15577879134bff6a128878c956612b..f74e2fedc7396335e6ff537bc2e882aa5da3e177 100644 (file)
--- a/services/api/app/models/commit.rb
+++ b/services/api/app/models/commit.rb
@@ -1,5 +1,11 @@
  class Commit < ActiveRecord::Base
-  require 'shellwords'
+  extend CurrentApiClient
+
+  class GitError < StandardError
+    def http_status
+      422
+    end
+  end
  
    def self.git_check_ref_format(e)
      if !e or e.empty? or e[0] == '-' or e[0] == '$'
@@ -11,146 +17,213 @@ class Commit < ActiveRecord::Base
      end
    end
  
-  def self.find_commit_range(current_user, repository, minimum, maximum, exclude)
+  # Return an array of commits (each a 40-char sha1) satisfying the
+  # given criteria.
+  #
+  # Return [] if the revisions given in minimum/maximum are invalid or
+  # don't exist in the given repository.
+  #
+  # Raise ArgumentError if the given repository is invalid, does not
+  # exist, or cannot be read for any reason. (Any transient error that
+  # prevents commit ranges from resolving must raise rather than
+  # returning an empty array.)
+  #
+  # repository can be the name of a locally hosted repository or a git
+  # URL (see git-fetch(1)). Currently http, https, and git schemes are
+  # supported.
+  def self.find_commit_range repository, minimum, maximum, exclude
      if minimum and minimum.empty?
        minimum = nil
      end
  
      if minimum and !git_check_ref_format(minimum)
        logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
-      return nil
+      return []
      end
  
      if maximum and !git_check_ref_format(maximum)
        logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
-      return nil
+      return []
      end
  
      if !maximum
        maximum = "HEAD"
      end
  
-    # Get list of actual repository directories under management
-    on_disk_repos = repositories
+    gitdir, is_remote = git_dir_for repository
+    fetch_remote_repository gitdir, repository if is_remote
+    ENV['GIT_DIR'] = gitdir
  
-    # Get list of repository objects readable by user
-    readable = Repository.readable_by(current_user)
+    commits = []
  
-    # filter repository objects on requested repository name
-    if repository
-      readable = readable.where(name: repository)
+    # Get the commit hash for the upper bound
+    max_hash = nil
+    git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
+    IO.foreach("|#{git_max_hash_cmd}") do |line|
+      max_hash = line.strip
      end
  
-    commits = []
-    readable.each do |r|
-      if on_disk_repos[r.name]
-        ENV['GIT_DIR'] = on_disk_repos[r.name][:git_dir]
-
-        # We've filtered for invalid characters, so we can pass the contents of
-        # minimum and maximum safely on the command line
+    # If not found, nothing else to do
+    if !max_hash
+      logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
+      return []
+    end
  
-        # Get the commit hash for the upper bound
-        max_hash = nil
-        IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line|
-          max_hash = line.strip
-        end
+    # If string is invalid, nothing else to do
+    if !git_check_ref_format(max_hash)
+      logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
+      return []
+    end
  
-        # If not found or string is invalid, nothing else to do
-        next if !max_hash or !git_check_ref_format(max_hash)
-
-        resolved_exclude = nil
-        if exclude
-          resolved_exclude = []
-          exclude.each do |e|
-            if git_check_ref_format(e)
-              IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
-                resolved_exclude.push(line.strip)
-              end
-            else
-              logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
-              return nil
-            end
+    resolved_exclude = nil
+    if exclude
+      resolved_exclude = []
+      exclude.each do |e|
+        if git_check_ref_format(e)
+          IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
+            resolved_exclude.push(line.strip)
            end
+        else
+          logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
+          return []
          end
+      end
+    end
  
-        if minimum
-          # Get the commit hash for the lower bound
-          min_hash = nil
-          IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line|
-            min_hash = line.strip
-          end
+    if minimum
+      # Get the commit hash for the lower bound
+      min_hash = nil
+      git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
+      IO.foreach("|#{git_min_hash_cmd}") do |line|
+        min_hash = line.strip
+      end
  
-          # If not found or string is invalid, nothing else to do
-          next if !min_hash or !git_check_ref_format(min_hash)
+      # If not found, nothing else to do
+      if !min_hash
+        logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
+        return []
+      end
  
-          # Now find all commits between them
-          IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
-            hash = line.strip
-            commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
-          end
+      # If string is invalid, nothing else to do
+      if !git_check_ref_format(min_hash)
+        logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
+        return []
+      end
  
-          commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
-        else
-          commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
-        end
-      else
-        logger.warn "Repository #{r.name} exists in table but not found on disk"
+      # Now find all commits between them
+      IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
+        hash = line.strip
+        commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
        end
-    end
  
-    if !commits or commits.empty?
-      nil
+      commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
      else
-      commits
+      commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
      end
+
+    commits
    end
  
-  # Import all commits from configured git directory into the commits
-  # database.
-
-  def self.import_all
-    repositories.each do |repo_name, repo|
-      stat = { true => 0, false => 0 }
-      ENV['GIT_DIR'] = repo[:git_dir]
-      IO.foreach("|git rev-list --format=oneline --all") do |line|
-        sha1, message = line.strip.split " ", 2
-        imported = false
-        Commit.find_or_create_by_repository_name_and_sha1_and_message(repo_name, sha1, message[0..254]) do
-          imported = true
-        end
-        stat[!!imported] += 1
-        if (stat[true] + stat[false]) % 100 == 0
-          if $stdout.tty? or ARGV[0] == '-v'
-            puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-          end
-        end
-      end
-      if $stdout.tty? or ARGV[0] == '-v'
-        puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
-      end
+  # Given a repository (url, or name of hosted repo) and commit sha1,
+  # copy the commit into the internal git repo and tag it with the
+  # given tag (typically a job UUID).
+  #
+  # The repo can be a remote url, but in this case sha1 must already
+  # be present in our local cache for that repo: e.g., sha1 was just
+  # returned by find_commit_range.
+  def self.tag_in_internal_repository repo_name, sha1, tag
+    unless git_check_ref_format tag
+      raise ArgumentError.new "invalid tag #{tag}"
      end
+    unless /^[0-9a-f]{40}$/ =~ sha1
+      raise ArgumentError.new "invalid sha1 #{sha1}"
+    end
+    src_gitdir, _ = git_dir_for repo_name
+    unless src_gitdir
+      raise ArgumentError.new "no local repository for #{repo_name}"
+    end
+    dst_gitdir = Rails.configuration.git_internal_dir
+    must_pipe("echo #{sha1.shellescape}",
+              "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+              "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+    must_git(dst_gitdir,
+             "tag --force #{tag.shellescape} #{sha1.shellescape}")
    end
  
-  def self.refresh_repositories
-    @repositories = nil
+  protected
+
+  def self.remote_url? repo_name
+    /^(https?|git):\/\// =~ repo_name
    end
  
-  protected
+  # Return [local_git_dir, is_remote]. If is_remote, caller must use
+  # fetch_remote_repository to ensure content is up-to-date.
+  #
+  # Raises an exception if the latest content could not be fetched for
+  # any reason.
+  def self.git_dir_for repo_name
+    if remote_url? repo_name
+      return [cache_dir_for(repo_name), true]
+    end
+    repos = Repository.readable_by(current_user).where(name: repo_name)
+    if repos.count == 0
+      raise ArgumentError.new "Repository not found: '#{repo_name}'"
+    elsif repos.count > 1
+      logger.error "Multiple repositories with name=='#{repo_name}'!"
+      raise ArgumentError.new "Name conflict"
+    else
+      return [repos.first.server_path, false]
+    end
+  end
+
+  def self.cache_dir_for git_url
+    File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s
+  end
  
-  def self.repositories
-    return @repositories if @repositories
+  def self.cache_dir_base
+    Rails.root.join 'tmp', 'git'
+  end
  
-    @repositories = {}
-    @gitdirbase = Rails.configuration.git_repositories_dir
-    Dir.foreach @gitdirbase do |repo|
-      next if repo.match /^\./
-      git_dir = File.join(@gitdirbase,
-                          repo.match(/\.git$/) ? repo : File.join(repo, '.git'))
-      next if git_dir == Rails.configuration.git_internal_dir
-      repo_name = repo.sub(/\.git$/, '')
-      @repositories[repo_name] = {git_dir: git_dir}
+  def self.fetch_remote_repository gitdir, git_url
+    # Caller decides which protocols are worth using. This is just a
+    # safety check to ensure we never use urls like "--flag" or wander
+    # into git's hardlink features by using bare "/path/foo" instead
+    # of "file:///path/foo".
+    unless /^[a-z]+:\/\// =~ git_url
+      raise ArgumentError.new "invalid git url #{git_url}"
+    end
+    begin
+      must_git gitdir, "branch"
+    rescue GitError => e
+      raise unless /Not a git repository/ =~ e.to_s
+      # OK, this just means we need to create a blank cache repository
+      # before fetching.
+      FileUtils.mkdir_p gitdir
+      must_git gitdir, "init"
      end
+    must_git(gitdir,
+             "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
+  end
  
-    @repositories
+  def self.must_git gitdir, *cmds
+    # Clear token in case a git helper tries to use it as a password.
+    orig_token = ENV['ARVADOS_API_TOKEN']
+    ENV['ARVADOS_API_TOKEN'] = ''
+    begin
+      git = "git --git-dir #{gitdir.shellescape}"
+      cmds.each do |cmd|
+        must_pipe git+" "+cmd
+      end
+    ensure
+      ENV['ARVADOS_API_TOKEN'] = orig_token
+    end
+  end
+
+  def self.must_pipe *cmds
+    cmd = cmds.join(" 2>&1 |") + " 2>&1"
+    out = IO.read("| </dev/null #{cmd}")
+    if not $?.success?
+      raise GitError.new "#{cmd}: #{$?}: #{out}"
+    end
    end
  end
diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb

index 01df069f32f90ae2cc4dd7955c76b6f7b9c572c0..6c24293334f6d4cc5af371b1b2b9a0d370466530 100644 (file)
--- a/services/api/app/models/job.rb
+++ b/services/api/app/models/job.rb
@@ -11,11 +11,12 @@ class Job < ArvadosModel
    before_validation :set_priority
    before_validation :update_state_from_old_state_attrs
    validate :ensure_script_version_is_commit
-  validate :find_arvados_sdk_version
    validate :find_docker_image_locator
+  validate :find_arvados_sdk_version
    validate :validate_status
    validate :validate_state_change
    validate :ensure_no_collection_uuids_in_script_params
+  before_save :tag_version_in_internal_repository
    before_save :update_timestamps_when_state_changes
  
    has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
@@ -124,21 +125,43 @@ class Job < ArvadosModel
    end
  
    def ensure_script_version_is_commit
-    if self.state == Running
+    if state == Running
        # Apparently client has already decided to go for it. This is
        # needed to run a local job using a local working directory
        # instead of a commit-ish.
        return true
      end
-    if new_record? or script_version_changed?
-      sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil
-      if sha1
-        self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty?
-        self.script_version = sha1
-      else
-        self.errors.add :script_version, "#{self.script_version} does not resolve to a commit"
+    if new_record? or repository_changed? or script_version_changed?
+      sha1 = Commit.find_commit_range(repository,
+                                      nil, script_version, nil).first
+      if not sha1
+        errors.add :script_version, "#{script_version} does not resolve to a commit"
          return false
        end
+      if supplied_script_version.nil? or supplied_script_version.empty?
+        self.supplied_script_version = script_version
+      end
+      self.script_version = sha1
+    end
+    true
+  end
+
+  def tag_version_in_internal_repository
+    if state == Running
+      # No point now. See ensure_script_version_is_commit.
+      true
+    elsif errors.any?
+      # Won't be saved, and script_version might not even be valid.
+      true
+    elsif new_record? or repository_changed? or script_version_changed?
+      uuid_was = uuid
+      begin
+        assign_uuid
+        Commit.tag_in_internal_repository repository, script_version, uuid
+      rescue
+        uuid = uuid_was
+        raise
+      end
      end
    end
  
@@ -169,9 +192,9 @@ class Job < ArvadosModel
    def find_arvados_sdk_version
      resolve_runtime_constraint("arvados_sdk_version",
                                 :arvados_sdk_version) do |git_search|
-      commits = Commit.find_commit_range(current_user, "arvados",
+      commits = Commit.find_commit_range("arvados",
                                           nil, git_search, nil)
-      if commits.nil? or commits.empty?
+      if commits.empty?
          [false, "#{git_search} does not resolve to a commit"]
        elsif not runtime_constraints["docker_image"]
          [false, "cannot be specified without a Docker image constraint"]
@@ -182,6 +205,10 @@ class Job < ArvadosModel
    end
  
    def find_docker_image_locator
+    runtime_constraints['docker_image'] =
+        Rails.configuration.default_docker_image_for_jobs if ((runtime_constraints.is_a? Hash) and
+                                                              (runtime_constraints['docker_image']).nil? and
+                                                              Rails.configuration.default_docker_image_for_jobs)
      resolve_runtime_constraint("docker_image",
                                 :docker_image_locator) do |image_search|
        image_tag = runtime_constraints['docker_image_tag']
diff --git a/services/api/app/models/keep_service.rb b/services/api/app/models/keep_service.rb

index 3baf0983d935a387dfa30bad3aed1115c1fbf065..6854ed2625218c462f786ab960268a8be7708910 100644 (file)
--- a/services/api/app/models/keep_service.rb
+++ b/services/api/app/models/keep_service.rb
@@ -8,6 +8,7 @@ class KeepService < ArvadosModel
      t.add  :service_port
      t.add  :service_ssl_flag
      t.add  :service_type
+    t.add  :read_only
    end
    api_accessible :superuser, :extend => :user do |t|
    end
diff --git a/services/api/app/models/node.rb b/services/api/app/models/node.rb

index bf27f6ff99104879da00019bd09bd7f975fbfd74..abb46fdc661128f5321a55b186d54afd142ed5f3 100644 (file)
--- a/services/api/app/models/node.rb
+++ b/services/api/app/models/node.rb
@@ -13,15 +13,6 @@ class Node < ArvadosModel
    belongs_to(:job, foreign_key: :job_uuid, primary_key: :uuid)
    attr_accessor :job_readable
  
-  MAX_SLOTS = 64
-
-  @@dns_server_conf_dir = Rails.configuration.dns_server_conf_dir
-  @@dns_server_conf_template = Rails.configuration.dns_server_conf_template
-  @@dns_server_reload_command = Rails.configuration.dns_server_reload_command
-  @@uuid_prefix = Rails.configuration.uuid_prefix
-  @@domain = Rails.configuration.compute_node_domain rescue `hostname --domain`.strip
-  @@nameservers = Rails.configuration.compute_node_nameservers
-
    api_accessible :user, :extend => :common do |t|
      t.add :hostname
      t.add :domain
@@ -36,11 +27,11 @@ class Node < ArvadosModel
    api_accessible :superuser, :extend => :user do |t|
      t.add :first_ping_at
      t.add :info
-    t.add lambda { |x| @@nameservers }, :as => :nameservers
+    t.add lambda { |x| Rails.configuration.compute_node_nameservers }, :as => :nameservers
    end
  
    def domain
-    super || @@domain
+    super || Rails.configuration.compute_node_domain
    end
  
    def api_job_uuid
@@ -103,7 +94,7 @@ class Node < ArvadosModel
        end
      end
  
-    # Assign hostname
+    # Assign slot_number
      if self.slot_number.nil?
        try_slot = 0
        begin
@@ -114,8 +105,12 @@ class Node < ArvadosModel
          rescue ActiveRecord::RecordNotUnique
            try_slot += 1
          end
-        raise "No available node slots" if try_slot == MAX_SLOTS
+        raise "No available node slots" if try_slot == Rails.configuration.max_compute_nodes
        end while true
+    end
+
+    # Assign hostname
+    if self.hostname.nil? and Rails.configuration.assign_node_hostname
        self.hostname = self.class.hostname_for_slot(self.slot_number)
      end
  
@@ -156,45 +151,77 @@ class Node < ArvadosModel
      end
    end
  
-  def self.dns_server_update(hostname, ip_address)
-    return unless @@dns_server_conf_dir and @@dns_server_conf_template
+  def self.dns_server_update hostname, ip_address
+    ok = true
+
      ptr_domain = ip_address.
        split('.').reverse.join('.').concat('.in-addr.arpa')
-    hostfile = File.join @@dns_server_conf_dir, "#{hostname}.conf"
  
-    begin
-      template = IO.read(@@dns_server_conf_template)
-    rescue => e
-      STDERR.puts "Unable to read dns_server_conf_template #{@@dns_server_conf_template}: #{e.message}"
-      return
-    end
+    template_vars = {
+      hostname: hostname,
+      uuid_prefix: Rails.configuration.uuid_prefix,
+      ip_address: ip_address,
+      ptr_domain: ptr_domain,
+    }
+
+    if Rails.configuration.dns_server_conf_dir and Rails.configuration.dns_server_conf_template
+      begin
+        begin
+          template = IO.read(Rails.configuration.dns_server_conf_template)
+        rescue => e
+          logger.error "Reading #{Rails.configuration.dns_server_conf_template}: #{e.message}"
+          raise
+        end
  
-    populated = template % {hostname:hostname, uuid_prefix:@@uuid_prefix, ip_address:ip_address, ptr_domain:ptr_domain}
+        hostfile = File.join Rails.configuration.dns_server_conf_dir, "#{hostname}.conf"
+        File.open hostfile+'.tmp', 'w' do |f|
+          f.puts template % template_vars
+        end
+        File.rename hostfile+'.tmp', hostfile
+      rescue => e
+        logger.error "Writing #{hostfile}: #{e.message}"
+        ok = false
+      end
+    end
  
-    begin
-      File.open hostfile, 'w' do |f|
-        f.puts populated
+    if Rails.configuration.dns_server_update_command
+      cmd = Rails.configuration.dns_server_update_command % template_vars
+      if not system cmd
+        logger.error "dns_server_update_command #{cmd.inspect} failed: #{$?}"
+        ok = false
        end
-    rescue => e
-      STDERR.puts "Unable to write #{hostfile}: #{e.message}"
-      return
      end
-    File.open(File.join(@@dns_server_conf_dir, 'restart.txt'), 'w') do |f|
-      # this will trigger a dns server restart
-      f.puts @@dns_server_reload_command
+
+    if Rails.configuration.dns_server_conf_dir and Rails.configuration.dns_server_reload_command
+      restartfile = File.join(Rails.configuration.dns_server_conf_dir, 'restart.txt')
+      begin
+        File.open(restartfile, 'w') do |f|
+          # Typically, this is used to trigger a dns server restart
+          f.puts Rails.configuration.dns_server_reload_command
+        end
+      rescue => e
+        logger.error "Unable to write #{restartfile}: #{e.message}"
+        ok = false
+      end
      end
+
+    ok
    end
  
    def self.hostname_for_slot(slot_number)
-    "compute#{slot_number}"
+    config = Rails.configuration.assign_node_hostname
+
+    return nil if !config
+
+    sprintf(config, {:slot_number => slot_number})
    end
  
    # At startup, make sure all DNS entries exist.  Otherwise, slurmctld
    # will refuse to start.
-  if @@dns_server_conf_dir and @@dns_server_conf_template
-    (0..MAX_SLOTS-1).each do |slot_number|
+  if Rails.configuration.dns_server_conf_dir and Rails.configuration.dns_server_conf_template and Rails.configuration.assign_node_hostname
+    (0..Rails.configuration.max_compute_nodes-1).each do |slot_number|
        hostname = hostname_for_slot(slot_number)
-      hostfile = File.join @@dns_server_conf_dir, "#{hostname}.conf"
+      hostfile = File.join Rails.configuration.dns_server_conf_dir, "#{hostname}.conf"
        if !File.exists? hostfile
          n = Node.where(:slot_number => slot_number).first
          if n.nil? or n.ip_address.nil?
diff --git a/services/api/app/models/repository.rb b/services/api/app/models/repository.rb

index f159b48bdacd62ca841ade0f42a394163d5bd084..f361a49db5dcd49b649d7e7f79c255e214eae97a 100644 (file)
--- a/services/api/app/models/repository.rb
+++ b/services/api/app/models/repository.rb
@@ -3,26 +3,113 @@ class Repository < ArvadosModel
    include KindAndEtag
    include CommonApiTemplate
  
+  # Order is important here.  We must validate the owner before we can
+  # validate the name.
+  validate :valid_owner
+  validate :name_format, :if => Proc.new { |r| r.errors[:owner_uuid].empty? }
+  validates(:name, uniqueness: true, allow_nil: false)
+
    api_accessible :user, extend: :common do |t|
      t.add :name
      t.add :fetch_url
      t.add :push_url
+    t.add :clone_urls
+  end
+
+  def self.attributes_required_columns
+    super.merge("clone_urls" => ["name"],
+                "fetch_url" => ["name"],
+                "push_url" => ["name"])
    end
  
+  # Deprecated. Use clone_urls instead.
    def push_url
-    super || self.name && "git@git.#{Rails.configuration.uuid_prefix}.arvadosapi.com:#{self.name}.git"
+    ssh_clone_url
    end
  
+  # Deprecated. Use clone_urls instead.
    def fetch_url
-    super || push_url
+    ssh_clone_url
    end
  
-  protected
+  def clone_urls
+    [ssh_clone_url, https_clone_url].compact
+  end
  
-  def permission_to_create
-    current_user and current_user.is_admin
+  def server_path
+    # Find where the repository is stored on the API server's filesystem,
+    # and return that path, or nil if not found.
+    # This method is only for the API server's internal use, and should not
+    # be exposed through the public API.  Following our current gitolite
+    # setup, it searches for repositories stored by UUID, then name; and it
+    # prefers bare repositories over checkouts.
+    [["%s.git"], ["%s", ".git"]].each do |repo_base, *join_args|
+      [:uuid, :name].each do |path_attr|
+        git_dir = File.join(Rails.configuration.git_repositories_dir,
+                            repo_base % send(path_attr), *join_args)
+        return git_dir if File.exist?(git_dir)
+      end
+    end
+    nil
    end
+
+  protected
+
    def permission_to_update
-    current_user and current_user.is_admin
+    if not super
+      false
+    elsif current_user.is_admin
+      true
+    elsif name_changed?
+      current_user.uuid == owner_uuid
+    else
+      true
+    end
+  end
+
+  def owner
+    User.find_by_uuid(owner_uuid)
+  end
+
+  def valid_owner
+    if owner.nil? or (owner.username.nil? and (owner.uuid != system_user_uuid))
+      errors.add(:owner_uuid, "must refer to a user with a username")
+      false
+    end
+  end
+
+  def name_format
+    if owner.uuid == system_user_uuid
+      prefix_match = ""
+      errmsg_start = "must be"
+    else
+      prefix_match = Regexp.escape(owner.username + "/")
+      errmsg_start = "must be the owner's username, then '/', then"
+    end
+    if not /^#{prefix_match}[A-Za-z][A-Za-z0-9]*$/.match(name)
+      errors.add(:name,
+                 "#{errmsg_start} a letter followed by alphanumerics")
+      false
+    end
+  end
+
+  def ssh_clone_url
+    _clone_url :git_repo_ssh_base, 'git@git.%s.arvadosapi.com:'
+  end
+
+  def https_clone_url
+    _clone_url :git_repo_https_base, 'https://git.%s.arvadosapi.com/'
+  end
+
+  def _clone_url config_var, default_base_fmt
+    configured_base = Rails.configuration.send config_var
+    return nil if configured_base == false
+    prefix = new_record? ? Rails.configuration.uuid_prefix : uuid[0,5]
+    if prefix == Rails.configuration.uuid_prefix and configured_base != true
+      base = configured_base
+    else
+      base = default_base_fmt % prefix
+    end
+    '%s%s.git' % [base, name]
    end
  end
diff --git a/services/api/app/models/user.rb b/services/api/app/models/user.rb

index a47a4583cd533e272d9b5f850b4281aef75f26c3..2200d050990809d04f3e5fdbe088a1af7621ba01 100644 (file)
--- a/services/api/app/models/user.rb
+++ b/services/api/app/models/user.rb
@@ -8,19 +8,42 @@ class User < ArvadosModel
  
    serialize :prefs, Hash
    has_many :api_client_authorizations
+  validates(:username,
+            format: {
+              with: /^[A-Za-z][A-Za-z0-9]*$/,
+              message: "must begin with a letter and contain only alphanumerics",
+            },
+            uniqueness: true,
+            allow_nil: true)
    before_update :prevent_privilege_escalation
    before_update :prevent_inactive_admin
+  before_update :verify_repositories_empty, :if => Proc.new { |user|
+    user.username.nil? and user.username_changed?
+  }
    before_create :check_auto_admin
+  before_create :set_initial_username, :if => Proc.new { |user|
+    user.username.nil? and user.email
+  }
    after_create :add_system_group_permission_link
-  after_create :auto_setup_new_user
+  after_create :auto_setup_new_user, :if => Proc.new { |user|
+    Rails.configuration.auto_setup_new_users and
+    (user.uuid != system_user_uuid) and
+    (user.uuid != anonymous_user_uuid)
+  }
    after_create :send_admin_notifications
    after_update :send_profile_created_notification
-
+  after_update :sync_repository_names, :if => Proc.new { |user|
+    (user.uuid != system_user_uuid) and
+    user.username_changed? and
+    (not user.username_was.nil?)
+  }
  
    has_many :authorized_keys, :foreign_key => :authorized_user_uuid, :primary_key => :uuid
+  has_many :repositories, foreign_key: :owner_uuid, primary_key: :uuid
  
    api_accessible :user, extend: :common do |t|
      t.add :email
+    t.add :username
      t.add :full_name
      t.add :first_name
      t.add :last_name
@@ -171,7 +194,7 @@ class User < ArvadosModel
    def setup_repo_vm_links(repo_name, vm_uuid, openid_prefix)
      oid_login_perm = create_oid_login_perm openid_prefix
      repo_perm = create_user_repo_link repo_name
-    vm_login_perm = create_vm_login_permission_link vm_uuid, repo_name
+    vm_login_perm = create_vm_login_permission_link vm_uuid, username
      group_perm = create_user_group_link
  
      return [oid_login_perm, repo_perm, vm_login_perm, group_perm, self].compact
@@ -222,9 +245,13 @@ class User < ArvadosModel
    end
  
    def permission_to_update
-    # users must be able to update themselves (even if they are
-    # inactive) in order to create sessions
-    self == current_user or super
+    if username_changed?
+      current_user.andand.is_admin
+    else
+      # users must be able to update themselves (even if they are
+      # inactive) in order to create sessions
+      self == current_user or super
+    end
    end
  
    def permission_to_create
@@ -237,13 +264,62 @@ class User < ArvadosModel
      return if self.uuid.end_with?('anonymouspublic')
      if (User.where("email = ?",self.email).where(:is_admin => true).count == 0 and
          Rails.configuration.auto_admin_user and self.email == Rails.configuration.auto_admin_user) or
-       (User.where("uuid not like '%-000000000000000'").where(:is_admin => true).count == 0 and 
+       (User.where("uuid not like '%-000000000000000'").where(:is_admin => true).count == 0 and
          Rails.configuration.auto_admin_first_user)
        self.is_admin = true
        self.is_active = true
      end
    end
  
+  def find_usable_username_from(basename)
+    # If "basename" is a usable username, return that.
+    # Otherwise, find a unique username "basenameN", where N is the
+    # smallest integer greater than 1, and return that.
+    # Return nil if a unique username can't be found after reasonable
+    # searching.
+    quoted_name = self.class.connection.quote_string(basename)
+    next_username = basename
+    next_suffix = 1
+    while Rails.configuration.auto_setup_name_blacklist.include?(next_username)
+      next_suffix += 1
+      next_username = "%s%i" % [basename, next_suffix]
+    end
+    0.upto(6).each do |suffix_len|
+      pattern = "%s%s" % [quoted_name, "_" * suffix_len]
+      self.class.
+          where("username like '#{pattern}'").
+          select(:username).
+          order(username: :asc).
+          find_each do |other_user|
+        if other_user.username > next_username
+          break
+        elsif other_user.username == next_username
+          next_suffix += 1
+          next_username = "%s%i" % [basename, next_suffix]
+        end
+      end
+      return next_username if (next_username.size <= pattern.size)
+    end
+    nil
+  end
+
+  def set_initial_username
+    email_parts = email.partition("@")
+    local_parts = email_parts.first.partition("+")
+    if email_parts.any?(&:empty?)
+      return
+    elsif not local_parts.first.empty?
+      base_username = local_parts.first
+    else
+      base_username = email_parts.first
+    end
+    base_username.sub!(/^[^A-Za-z]+/, "")
+    base_username.gsub!(/[^A-Za-z0-9]/, "")
+    unless base_username.empty?
+      self.username = find_usable_username_from(base_username)
+    end
+  end
+
    def prevent_privilege_escalation
      if current_user.andand.is_admin
        return true
@@ -321,80 +397,45 @@ class User < ArvadosModel
        return
      end
  
-    # Check for an existing repository with the same name we're about to use.
-    repo = Repository.where(name: repo_name).first
-
-    if repo
-      logger.warn "Repository exists for #{repo_name}: #{repo[:uuid]}."
-
-      # Look for existing repository access for this repo
-      repo_perms = Link.where(tail_uuid: self.uuid,
-                              head_uuid: repo[:uuid],
-                              link_class: 'permission',
-                              name: 'can_manage')
-      if repo_perms.any?
-        logger.warn "User already has repository access " +
-            repo_perms.collect { |p| p[:uuid] }.inspect
-        return repo_perms.first
-      end
-    end
-
-    # create repo, if does not already exist
-    repo ||= Repository.create(name: repo_name)
+    repo = Repository.where(owner_uuid: uuid, name: repo_name).first_or_create!
      logger.info { "repo uuid: " + repo[:uuid] }
-
-    repo_perm = Link.create(tail_uuid: self.uuid,
-                            head_uuid: repo[:uuid],
-                            link_class: 'permission',
-                            name: 'can_manage')
+    repo_perm = Link.where(tail_uuid: uuid, head_uuid: repo.uuid,
+                           link_class: "permission",
+                           name: "can_manage").first_or_create!
      logger.info { "repo permission: " + repo_perm[:uuid] }
      return repo_perm
    end
  
    # create login permission for the given vm_uuid, if it does not already exist
    def create_vm_login_permission_link(vm_uuid, repo_name)
-    begin
-
-      # vm uuid is optional
-      if vm_uuid
-        vm = VirtualMachine.where(uuid: vm_uuid).first
+    # vm uuid is optional
+    if vm_uuid
+      vm = VirtualMachine.where(uuid: vm_uuid).first
  
-        if not vm
-          logger.warn "Could not find virtual machine for #{vm_uuid.inspect}"
-          raise "No vm found for #{vm_uuid}"
-        end
-      else
-        return
+      if not vm
+        logger.warn "Could not find virtual machine for #{vm_uuid.inspect}"
+        raise "No vm found for #{vm_uuid}"
        end
+    else
+      return
+    end
  
-      logger.info { "vm uuid: " + vm[:uuid] }
+    logger.info { "vm uuid: " + vm[:uuid] }
+    login_attrs = {
+      tail_uuid: uuid, head_uuid: vm.uuid,
+      link_class: "permission", name: "can_login",
+    }
  
-      login_perms = Link.where(tail_uuid: self.uuid,
-                              head_uuid: vm[:uuid],
-                              link_class: 'permission',
-                              name: 'can_login')
+    login_perm = Link.
+      where(login_attrs).
+      select { |link| link.properties["username"] == repo_name }.
+      first
  
-      perm_exists = false
-      login_perms.each do |perm|
-        if perm.properties['username'] == repo_name
-          perm_exists = perm
-          break
-        end
-      end
-
-      if perm_exists
-        login_perm = perm_exists
-      else
-        login_perm = Link.create(tail_uuid: self.uuid,
-                                 head_uuid: vm[:uuid],
-                                 link_class: 'permission',
-                                 name: 'can_login',
-                                 properties: {'username' => repo_name})
-        logger.info { "login permission: " + login_perm[:uuid] }
-      end
+    login_perm ||= Link.
+      create(login_attrs.merge(properties: {"username" => repo_name}))
  
-      return login_perm
-    end
+    logger.info { "login permission: " + login_perm[:uuid] }
+    login_perm
    end
  
    # add the user to the 'All users' group
@@ -431,45 +472,18 @@ class User < ArvadosModel
  
    # Automatically setup new user during creation
    def auto_setup_new_user
-    return true if !Rails.configuration.auto_setup_new_users
-    return true if !self.email
-    return true if self.uuid == system_user_uuid
-    return true if self.uuid == anonymous_user_uuid
-
-    if Rails.configuration.auto_setup_new_users_with_vm_uuid ||
-       Rails.configuration.auto_setup_new_users_with_repository
-      username = self.email.partition('@')[0] if self.email
-      return true if !username
-
-      blacklisted_usernames = Rails.configuration.auto_setup_name_blacklist
-      if blacklisted_usernames.include?(username)
-        return true
-      elsif !(/^[a-zA-Z][-._a-zA-Z0-9]{0,30}[a-zA-Z0-9]$/.match(username))
-        return true
-      else
-        return true if !(username = derive_unique_username username)
+    setup_repo_vm_links(nil, nil, Rails.configuration.default_openid_prefix)
+    if username
+      create_vm_login_permission_link(Rails.configuration.auto_setup_new_users_with_vm_uuid,
+                                      username)
+      repo_name = "#{username}/#{username}"
+      if Rails.configuration.auto_setup_new_users_with_repository and
+          Repository.where(name: repo_name).first.nil?
+        repo = Repository.create!(name: repo_name, owner_uuid: uuid)
+        Link.create!(tail_uuid: uuid, head_uuid: repo.uuid,
+                     link_class: "permission", name: "can_manage")
        end
      end
-
-    # setup user
-    setup_repo_vm_links(username,
-                        Rails.configuration.auto_setup_new_users_with_vm_uuid,
-                        Rails.configuration.default_openid_prefix)
-  end
-
-  # Find a username that starts with the given string and does not collide
-  # with any existing repository name or VM login name
-  def derive_unique_username username
-    while true
-      if Repository.where(name: username).empty?
-        login_collisions = Link.where(link_class: 'permission',
-                                      name: 'can_login').select do |perm|
-          perm.properties['username'] == username
-        end
-        return username if login_collisions.empty?
-      end
-      username = username + SecureRandom.random_number(10).to_s
-    end
    end
  
    # Send notification if the user saved profile for the first time
@@ -482,4 +496,19 @@ class User < ArvadosModel
      end
    end
  
+  def verify_repositories_empty
+    unless repositories.first.nil?
+      errors.add(:username, "can't be unset when the user owns repositories")
+      false
+    end
+  end
+
+  def sync_repository_names
+    old_name_re = /^#{Regexp.escape(username_was)}\//
+    name_sub = "#{username}/"
+    repositories.find_each do |repo|
+      repo.name = repo.name.sub(old_name_re, name_sub)
+      repo.save!
+    end
+  end
  end
diff --git a/services/api/app/views/static/login_failure.html.erb b/services/api/app/views/static/login_failure.html.erb

index 830942c897b0407a0883fbbfc791a7d41e502acf..35f4f100fb4021ff4c70f1e5c63427c606fb6ce8 100644 (file)
--- a/services/api/app/views/static/login_failure.html.erb
+++ b/services/api/app/views/static/login_failure.html.erb
@@ -6,7 +6,7 @@ $(function(){
  
  
  <div id="intropage">
-  <img class="curoverse-logo" src="<%= asset_path('logo.png') %>" style="display:block; margin:2em auto"/>
+  <img class="curoverse-logo" src="<%= asset_path('logo.png') rescue '/logo.png' %>" style="display:block; margin:2em auto"/>
    <div style="width:30em; margin:2em auto 0 auto">
  
      <h1>Error</h1>
diff --git a/services/api/app/views/user_notifier/account_is_setup.text.erb b/services/api/app/views/user_notifier/account_is_setup.text.erb

index 5d8c9e7d25fa3c0b5896b5de92a7b004cdcd76a3..2500bf13c20deeed19c30207e51eeef2c04cb0a4 100644 (file)
--- a/services/api/app/views/user_notifier/account_is_setup.text.erb
+++ b/services/api/app/views/user_notifier/account_is_setup.text.erb
@@ -4,10 +4,11 @@
  Hi there,
  <% end -%>
  
-Your Arvados account has been set up. You can log in with your Google account
-associated with the e-mail address <%= @user.email %><% if Rails.configuration.workbench_address %> at:
+Your Arvados shell account has been set up. Please visit the virtual machines page <% if Rails.configuration.workbench_address %>at
  
-  <%= Rails.configuration.workbench_address %><% else %>.<% end %>
+  <%= Rails.configuration.workbench_address %><%= "/" if !Rails.configuration.workbench_address.end_with?("/") %>users/<%= @user.uuid%>/virtual_machines <% else %><% end %>
+
+for connection instructions.
  
  Thanks,
  The Arvados team.
diff --git a/services/api/config/application.default.yml b/services/api/config/application.default.yml

index 8ecef119a708c044e10c1f3689d1756eddd0a06d..51bc4f98f3b89f47e9dbb0f669a5a458f0fc6a55 100644 (file)
--- a/services/api/config/application.default.yml
+++ b/services/api/config/application.default.yml
@@ -1,61 +1,39 @@
  # Do not use this file for site configuration. Create application.yml
  # instead (see application.yml.example).
-
-development:
-  force_ssl: false
-  cache_classes: false
-  whiny_nils: true
-  consider_all_requests_local: true
-  action_controller.perform_caching: false
-  action_mailer.raise_delivery_errors: false
-  action_mailer.perform_deliveries: false
-  active_support.deprecation: :log
-  action_dispatch.best_standards_support: :builtin
-  active_record.mass_assignment_sanitizer: :strict
-  active_record.auto_explain_threshold_in_seconds: 0.5
-  assets.compress: false
-  assets.debug: true
-  local_modified: "<%= '-modified' if `git status -s` != '' %>"
-
-production:
-  force_ssl: true
-  cache_classes: true
-  consider_all_requests_local: false
-  action_controller.perform_caching: true
-  serve_static_assets: false
-  assets.compress: true
-  assets.compile: false
-  assets.digest: true
-
-test:
-  force_ssl: false
-  cache_classes: true
-  serve_static_assets: true
-  static_cache_control: public, max-age=3600
-  whiny_nils: true
-  consider_all_requests_local: true
-  action_controller.perform_caching: false
-  action_dispatch.show_exceptions: false
-  action_controller.allow_forgery_protection: false
-  action_mailer.delivery_method: :test
-  active_support.deprecation: :stderr
-  active_record.mass_assignment_sanitizer: :strict
-  uuid_prefix: zzzzz
-  secret_token: <%= rand(2**512).to_s(36) %>
-  blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
-  user_profile_notification_address: arvados@example.com
-  workbench_address: https://localhost:3001/
+#
+# The order of precedence is:
+# 1. config/environments/{RAILS_ENV}.rb (deprecated)
+# 2. Section in application.yml corresponding to RAILS_ENV (e.g., development)
+# 3. Section in application.yml called "common"
+# 4. Section in application.default.yml corresponding to RAILS_ENV
+# 5. Section in application.default.yml called "common"
  
  common:
+  ###
+  ### Essential site configuration
+  ###
+
    # The prefix used for all database identifiers to identify the record as
    # originating from this site.  Must be exactly 5 alphanumeric characters
    # (lowercase ASCII letters and digits).
-  uuid_prefix: <%= Digest::MD5.hexdigest(`hostname`).to_i(16).to_s(36)[0..4] %>
+  uuid_prefix: ~
  
-  # If not false, this is the hostname that will be used for root_url and
-  # advertised in the discovery document.  By default, use the default Rails
-  # logic for deciding on a hostname.
-  host: false
+  # secret_token is a string of alphanumeric characters used by Rails
+  # to sign session tokens. IMPORTANT: This is a site secret. It
+  # should be at least 50 characters.
+  secret_token: ~
+
+  # blob_signing_key is a string of alphanumeric characters used to
+  # generate permission signatures for Keep locators. It must be
+  # identical to the permission key given to Keep. IMPORTANT: This is
+  # a site secret. It should be at least 50 characters.
+  blob_signing_key: ~
+
+  # These settings are provided by your OAuth2 provider (e.g.,
+  # sso-provider).
+  sso_app_secret: ~
+  sso_app_id: ~
+  sso_provider_url: ~
  
    # If this is not false, HTML requests at the API server's root URL
    # are redirected to this location, and it is provided in the text of
@@ -63,11 +41,35 @@ common:
    # to log in.
    workbench_address: false
  
+  # The ARVADOS_WEBSOCKETS environment variable determines whether to
+  # serve http, websockets, or both.
+  #
+  # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
+  # from the same process.
+  #
+  # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
+  #
+  # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
+  # served. In this case, you should have a separate process serving
+  # websockets, and the address of that service should be given here
+  # as websocket_address.
+  #
+  # If websocket_address is false (which is the default), the
+  # discovery document will tell clients to use the current server as
+  # the websocket service, or (if the current server does not have
+  # websockets enabled) not to use websockets at all.
+  #
+  # Example: Clients will connect to the specified endpoint.
+  #websocket_address: wss://127.0.0.1:3333/websocket
+  # Default: Clients will connect to this server if it's running
+  # websockets, otherwise none at all.
+  websocket_address: false
+
    # Git repositories must be readable by api server, or you won't be
    # able to submit crunch jobs. To pass the test suites, put a clone
    # of the arvados tree in {git_repositories_dir}/arvados.git or
    # {git_repositories_dir}/arvados/.git
-  git_repositories_dir: /var/lib/arvados/git
+  git_repositories_dir: /var/lib/arvados/git/repositories
  
    # This is a (bare) repository that stores commits used in jobs.  When a job
    # runs, the source commits are first fetched into this repository, then this
@@ -75,15 +77,128 @@ common:
    # subdirectory of {git_repositiories_dir}.
    git_internal_dir: /var/lib/arvados/internal.git
  
-  # :none or :slurm_immediate
-  crunch_job_wrapper: :none
+  # Default replication level for collections. This is used when a
+  # collection's replication_desired attribute is nil.
+  default_collection_replication: 2
  
-  # username, or false = do not set uid when running jobs.
-  crunch_job_user: crunch
  
-  # The web service must be able to create/write this file, and
-  # crunch-job must be able to stat() it.
-  crunch_refresh_trigger: /tmp/crunch_refresh_trigger
+  ###
+  ### Overriding default advertised hostnames/URLs
+  ###
+
+  # If not false, this is the hostname that will be used for root_url and
+  # advertised in the discovery document.  By default, use the default Rails
+  # logic for deciding on a hostname.
+  host: false
+
+  # Base part of SSH git clone url given with repository resources. If
+  # true, the default "git@git.(uuid_prefix).arvadosapi.com:" is
+  # used. If false, SSH clone URLs are not advertised. Include a
+  # trailing ":" or "/" if needed: it will not be added automatically.
+  git_repo_ssh_base: true
+
+  # Base part of HTTPS git clone urls given with repository
+  # resources. This is expected to be an arv-git-httpd service which
+  # accepts API tokens as HTTP-auth passwords. If true, the default
+  # "https://git.(uuid_prefix).arvadosapi.com/" is used. If false,
+  # HTTPS clone URLs are not advertised. Include a trailing ":" or "/"
+  # if needed: it will not be added automatically.
+  git_repo_https_base: true
+
+
+  ###
+  ### New user and & email settings
+  ###
+
+  # Config parameters to automatically setup new users.
+  # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
+  # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
+  auto_setup_new_users: false
+  auto_setup_new_users_with_vm_uuid: false
+  auto_setup_new_users_with_repository: false
+  auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
+
+  # When new_users_are_active is set to true, the user agreement check is skipped.
+  new_users_are_active: false
+
+  # The e-mail address of the user you would like to become marked as an admin
+  # user on their first login.
+  # In the default configuration, authentication happens through the Arvados SSO
+  # server, which uses OAuth2 against Google's servers, so in that case this
+  # should be an address associated with a Google account.
+  auto_admin_user: false
+
+  # If auto_admin_first_user is set to true, the first user to log in when no
+  # other admin users exist will automatically become an admin user.
+  auto_admin_first_user: false
+
+  # Email address to notify whenever a user creates a profile for the
+  # first time
+  user_profile_notification_address: false
+
+  admin_notifier_email_from: arvados@example.com
+  email_subject_prefix: "[ARVADOS] "
+  user_notifier_email_from: arvados@example.com
+  new_user_notification_recipients: [ ]
+  new_inactive_user_notification_recipients: [ ]
+
+
+  ###
+  ### Limits, timeouts and durations
+  ###
+
+  # Lifetime (in seconds) of blob permission signatures generated by
+  # the API server. This determines how long a client can take (after
+  # retrieving a collection record) to retrieve the collection data
+  # from Keep. If the client needs more time than that (assuming the
+  # collection still has the same content and the relevant user/token
+  # still has permission) the client can retrieve the collection again
+  # to get fresh signatures.
+  #
+  # Datamanager considers an unreferenced block older than this to be
+  # eligible for garbage collection. Therefore, it should never be
+  # smaller than the corresponding value used by any local keepstore
+  # service (see keepstore -blob-signature-ttl flag). This rule
+  # prevents datamanager from trying to garbage-collect recently
+  # written blocks while clients are still holding valid signatures.
+  #
+  # The default is 2 weeks.
+  blob_signature_ttl: 1209600
+
+  # Default lifetime for ephemeral collections: 2 weeks.
+  default_trash_lifetime: 1209600
+
+  # Maximum size (in bytes) allowed for a single API request.  This
+  # limit is published in the discovery document for use by clients.
+  # Note: You must separately configure the upstream web server or
+  # proxy to actually enforce the desired maximum request size on the
+  # server side.
+  max_request_size: 134217728
+
+  # Limit the number of bytes read from the database during an index
+  # request (by retrieving and returning fewer rows than would
+  # normally be returned in a single response).
+  # Note 1: This setting never reduces the number of returned rows to
+  # zero, no matter how big the first data row is.
+  # Note 2: Currently, this only limits the
+  # arvados.v1.collections.list API (GET /arvados/v1/collections), and
+  # only takes the size of manifest_text into account. Other fields
+  # (e.g., "properties" hashes) are not counted against this limit
+  # when returning collections, and the limit is not applied at all
+  # for other data types.
+  max_index_database_read: 134217728
+
+  # When you run the db:delete_old_job_logs task, it will find jobs that
+  # have been finished for at least this many seconds, and delete their
+  # stderr logs from the logs table.
+  clean_job_log_rows_after: <%= 30.days %>
+
+  # The maximum number of compute nodes that can be in use simultaneously
+  # If this limit is reduced, any existing nodes with slot number >= new limit
+  # will not be counted against the new limit. In other words, the new limit
+  # won't be strictly enforced until those nodes with higher slot numbers
+  # go down.
+  max_compute_nodes: 64
  
    # These two settings control how frequently log events are flushed to the
    # database.  Log lines are buffered until either crunch_log_bytes_per_event
@@ -107,56 +222,82 @@ common:
    # silenced by throttling are not counted against this total.
    crunch_limit_log_bytes_per_job: 67108864
  
-  # Path to dns server configuration directory (e.g. /etc/unbound.d/conf.d),
-  # or false = do not update dns server data.
+
+  ###
+  ### Crunch, DNS & compute node management
+  ###
+
+  # Docker image to be used when none found in runtime_constraints of a job
+  default_docker_image_for_jobs: false
+
+  # :none or :slurm_immediate
+  crunch_job_wrapper: :none
+
+  # username, or false = do not set uid when running jobs.
+  crunch_job_user: crunch
+
+  # The web service must be able to create/write this file, and
+  # crunch-job must be able to stat() it.
+  crunch_refresh_trigger: /tmp/crunch_refresh_trigger
+
+  # Path to dns server configuration directory
+  # (e.g. /etc/unbound.d/conf.d). If false, do not write any config
+  # files or touch restart.txt (see below).
    dns_server_conf_dir: false
  
-  # Template for the dns server host snippets. See unbound.template in this directory for
-  # an example. Set to false to disable.
+  # Template file for the dns server host snippets. See
+  # unbound.template in this directory for an example. If false, do
+  # not write any config files.
    dns_server_conf_template: false
  
-  # Dns server reload command, or false = do not reload dns server after data change
+  # String to write to {dns_server_conf_dir}/restart.txt (with a
+  # trailing newline) after updating local data. If false, do not
+  # open or write the restart.txt file.
    dns_server_reload_command: false
  
-  # Example for unbound
+  # Command to run after each DNS update. Template variables will be
+  # substituted; see the "unbound" example below. If false, do not run
+  # a command.
+  dns_server_update_command: false
+
+  ## Example for unbound:
    #dns_server_conf_dir: /etc/unbound/conf.d
    #dns_server_conf_template: /path/to/your/api/server/config/unbound.template
-  #dns_server_reload_command: /etc/init.d/unbound reload
+  ## ...plus one of the following two methods of reloading:
+  #dns_server_reload_command: unbound-control reload
+  #dns_server_update_command: echo %{hostname} %{hostname}.%{uuid_prefix} %{hostname}.%{uuid_prefix}.arvadosapi.com %{ptr_domain} | xargs -n 1 unbound-control local_data_remove && unbound-control local_data %{hostname} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix} IN A %{ip_address} && unbound-control local_data %{hostname}.%{uuid_prefix}.arvadosapi.com IN A %{ip_address} && unbound-control local_data %{ptr_domain}. IN PTR %{hostname}.%{uuid_prefix}.arvadosapi.com
  
    compute_node_domain: false
    compute_node_nameservers:
      - 192.168.1.1
  
-  # The version below is suitable for AWS.
-  # To use it, copy it to your application.yml, uncomment, and change <%# to <%=
-  # compute_node_nameservers: <%#
-  #   require 'net/http'
-  #   ['local', 'public'].collect do |iface|
-  #     Net::HTTP.get(URI("http://169.254.169.254/latest/meta-data/#{iface}-ipv4")).match(/^[\d\.]+$/)[0]
-  #   end << '172.16.0.23'
-  # %>
-
-  accept_api_token: {}
+  # Hostname to assign to a compute node when it sends a "ping" and the
+  # hostname in its Node record is nil.
+  # During bootstrapping, the "ping" script is expected to notice the
+  # hostname given in the ping response, and update its unix hostname
+  # accordingly.
+  # If false, leave the hostname alone (this is appropriate if your compute
+  # nodes' hostnames are already assigned by some other mechanism).
+  #
+  # One way or another, the hostnames of your node records should agree
+  # with your DNS records and your /etc/slurm-llnl/slurm.conf files.
+  #
+  # Example for compute0000, compute0001, ....:
+  # assign_node_hostname: compute%<slot_number>04d
+  # (See http://ruby-doc.org/core-2.2.2/Kernel.html#method-i-format for more.)
+  assign_node_hostname: compute%<slot_number>d
  
-  # When new_users_are_active is set to true, the user agreement check is skipped.
-  new_users_are_active: false
  
-  admin_notifier_email_from: arvados@example.com
-  email_subject_prefix: "[ARVADOS] "
-  user_notifier_email_from: arvados@example.com
-  new_user_notification_recipients: [ ]
-  new_inactive_user_notification_recipients: [ ]
+  ###
+  ### Remaining assorted configuration options.
+  ###
  
-  # The e-mail address of the user you would like to become marked as an admin
-  # user on their first login.
-  # In the default configuration, authentication happens through the Arvados SSO
-  # server, which uses openid against Google's servers, so in that case this
-  # should be an address associated with a Google account.
-  auto_admin_user: false
+  arvados_theme: default
  
-  # If auto_admin_first_user is set to true, the first user to log in when no
-  # other admin users exist will automatically become an admin user.
-  auto_admin_first_user: false
+  # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
+  # On (sso) server.  Should only be enabled during development when the SSO
+  # server is using a self-signed cert.
+  sso_insecure: false
  
    ## Set Time.zone default to the specified zone and make Active
    ## Record auto-convert to this zone.  Run "rake -D time" for a list
@@ -172,42 +313,6 @@ common:
    # Version of your assets, change this if you want to expire all your assets
    assets.version: "1.0"
  
-  arvados_theme: default
-
-  # The ARVADOS_WEBSOCKETS environment variable determines whether to
-  # serve http, websockets, or both.
-  #
-  # If ARVADOS_WEBSOCKETS="true", http and websockets are both served
-  # from the same process.
-  #
-  # If ARVADOS_WEBSOCKETS="ws-only", only websockets is served.
-  #
-  # If ARVADOS_WEBSOCKETS="false" or not set at all, only http is
-  # served. In this case, you should have a separate process serving
-  # websockets, and the address of that service should be given here
-  # as websocket_address.
-  #
-  # If websocket_address is false (which is the default), the
-  # discovery document will tell clients to use the current server as
-  # the websocket service, or (if the current server does not have
-  # websockets enabled) not to use websockets at all.
-  #
-  # Example: Clients will connect to the specified endpoint.
-  #websocket_address: wss://127.0.0.1:3333/websocket
-  # Default: Clients will connect to this server if it's running
-  # websockets, otherwise none at all.
-  websocket_address: false
-
-  # blob_signing_key is a string of alphanumeric characters used to
-  # generate permission signatures for Keep locators. It must be
-  # identical to the permission key given to Keep. IMPORTANT: This is
-  # a site secret. It should be at least 50 characters.
-  blob_signing_key: ~
-
-  # Amount of time (in seconds) for which a blob permission signature
-  # remains valid.  Default: 2 weeks (1209600 seconds)
-  blob_signing_ttl: 1209600
-
    # Allow clients to create collections by providing a manifest with
    # unsigned data blob locators. IMPORTANT: This effectively disables
    # access controls for data stored in Keep: a client who knows a hash
@@ -219,42 +324,59 @@ common:
    # one another!
    permit_create_collection_with_unsigned_manifest: false
  
-  # secret_token is a string of alphanumeric characters used by Rails
-  # to sign session tokens. IMPORTANT: This is a site secret. It
-  # should be at least 50 characters.
-  secret_token: ~
-
-  # email address to which mail should be sent when the user creates profile for the first time
-  user_profile_notification_address: false
-
    default_openid_prefix: https://www.google.com/accounts/o8/id
  
-  # Config parameters to automatically setup new users.
-  # The params auto_setup_new_users_with_* are meaningful only when auto_setup_new_users is turned on.
-  # auto_setup_name_blacklist is a list of usernames to be blacklisted for auto setup.
-  auto_setup_new_users: false
-  auto_setup_new_users_with_vm_uuid: false
-  auto_setup_new_users_with_repository: false
-  auto_setup_name_blacklist: [arvados, git, gitolite, gitolite-admin, root, syslog]
-
    # source_version
    source_version: "<%= `git log -n 1 --format=%h`.strip %>"
    local_modified: false
  
-  # Default lifetime for ephemeral collections: 2 weeks.
-  default_trash_lifetime: 1209600
  
-  # Permit insecure (OpenSSL::SSL::VERIFY_NONE) connections to the Single Sign
-  # On (sso) server.  Should only be enabled during development when the SSO
-  # server is using a self-signed cert.
-  sso_insecure: false
+development:
+  force_ssl: false
+  cache_classes: false
+  whiny_nils: true
+  consider_all_requests_local: true
+  action_controller.perform_caching: false
+  action_mailer.raise_delivery_errors: false
+  action_mailer.perform_deliveries: false
+  active_support.deprecation: :log
+  action_dispatch.best_standards_support: :builtin
+  active_record.mass_assignment_sanitizer: :strict
+  active_record.auto_explain_threshold_in_seconds: 0.5
+  assets.compress: false
+  assets.debug: true
+  local_modified: "<%= '-modified' if `git status -s` != '' %>"
  
-  # Default replication level for collections. This is used when a
-  # collection's replication_desired attribute is nil.
-  default_collection_replication: 2
+production:
+  force_ssl: true
+  cache_classes: true
+  consider_all_requests_local: false
+  action_controller.perform_caching: true
+  serve_static_assets: false
+  assets.compress: true
+  assets.compile: false
+  assets.digest: true
  
-  # Maximum size (in bytes) allowed for a single API request that will be
-  # published in the discovery document for use by clients.
-  # Note you must separately configure the upstream web server or proxy to
-  # actually enforce the desired maximum request size on the server side.
-  max_request_size: 134217728
+test:
+  force_ssl: false
+  cache_classes: true
+  serve_static_assets: true
+  static_cache_control: public, max-age=3600
+  whiny_nils: true
+  consider_all_requests_local: true
+  action_controller.perform_caching: false
+  action_dispatch.show_exceptions: false
+  action_controller.allow_forgery_protection: false
+  action_mailer.delivery_method: :test
+  active_support.deprecation: :stderr
+  active_record.mass_assignment_sanitizer: :strict
+  uuid_prefix: zzzzz
+  sso_app_id: arvados-server
+  sso_app_secret: <%= rand(2**512).to_s(36) %>
+  sso_provider_url: http://localhost:3002
+  secret_token: <%= rand(2**512).to_s(36) %>
+  blob_signing_key: zfhgfenhffzltr9dixws36j1yhksjoll2grmku38mi7yxd66h5j4q9w4jzanezacp8s6q0ro3hxakfye02152hncy6zml2ed0uc
+  user_profile_notification_address: arvados@example.com
+  workbench_address: https://localhost:3001/
+  git_repositories_dir: <%= Rails.root.join 'tmp', 'git', 'test' %>
+  git_internal_dir: <%= Rails.root.join 'tmp', 'internal.git' %>
diff --git a/services/api/config/application.yml.example b/services/api/config/application.yml.example

index c3e599feeb0b8980ecadfb54fb9791b7fe370e03..149770c9ac38c9e40b311fae2b429eebd09009d1 100644 (file)
--- a/services/api/config/application.yml.example
+++ b/services/api/config/application.yml.example
@@ -10,19 +10,33 @@
  # 4. Section in application.default.yml corresponding to RAILS_ENV
  # 5. Section in application.default.yml called "common"
  
-development:
-  # Mandatory site secrets. See application.default.yml for more info.
+production:
+  # Mandatory site configuration.  See application.default.yml and
+  # http://http://doc.arvados.org/install/install-api-server.html#configure_application
+  # for more information.
+  uuid_prefix: ~
    secret_token: ~
    blob_signing_key: ~
-  uuid_prefix: bogus
-  workbench_address: https://localhost:3031
+  sso_app_secret: ~
+  sso_app_id: ~
+  sso_provider_url: ~
+  workbench_address: ~
+  websockets_address: ~
+  #git_repositories_dir: ~
+  #git_internal_dir: ~
  
-production:
-  # Mandatory site secrets. See application.default.yml for more info.
+development:
+  # Separate settings for development configuration.
+  uuid_prefix: ~
    secret_token: ~
    blob_signing_key: ~
-  uuid_prefix: bogus
-  workbench_address: https://workbench.bogus.arvadosapi.com
+  sso_app_id: ~
+  sso_app_secret: ~
+  sso_provider_url: ~
+  workbench_address: ~
+  websockets_address: ~
+  #git_repositories_dir: ~
+  #git_internal_dir: ~
  
  test:
    # Tests should be able to run without further configuration, but if you do
diff --git a/services/api/config/database.yml.sample b/services/api/config/database.yml.example

similarity index 86%

rename from services/api/config/database.yml.sample

rename to services/api/config/database.yml.example

index 6a2701622b0d3e1649ffe6c74a45ed36ce6b0a6e..f58cc11c95086c77257aed8331f2092786caddc5 100644 (file)
--- a/services/api/config/database.yml.sample
+++ b/services/api/config/database.yml.example
@@ -1,5 +1,6 @@
  development:
    adapter: postgresql
+  template: template0
    encoding: utf8
    database: arvados_development
    username: arvados
@@ -8,6 +9,7 @@ development:
  
  test:
    adapter: postgresql
+  template: template0
    encoding: utf8
    database: arvados_test
    username: arvados
@@ -16,6 +18,7 @@ test:
  
  production:
    adapter: postgresql
+  template: template0
    encoding: utf8
    database: arvados_production
    username: arvados
diff --git a/services/api/config/initializers/hardcoded_api_tokens.rb.example b/services/api/config/initializers/hardcoded_api_tokens.rb.example

deleted file mode 100644 (file)

index 6339bf6..0000000
--- a/services/api/config/initializers/hardcoded_api_tokens.rb.example
+++ /dev/null
@@ -1,3 +0,0 @@
-Server::Application.configure do
-  config.accept_api_token = { 'foobar' => true }
-end
diff --git a/services/api/config/initializers/zz_load_config.rb b/services/api/config/initializers/load_config.rb

similarity index 56%

rename from services/api/config/initializers/zz_load_config.rb

rename to services/api/config/initializers/load_config.rb

index 3399fd9bf59fb9751303830cbb87aee89a502e81..de9770d7b7d1b45d7466b87b93f1e2fee9e9afab 100644 (file)
--- a/services/api/config/initializers/zz_load_config.rb
+++ b/services/api/config/initializers/load_config.rb
@@ -1,3 +1,27 @@
+begin
+  # If secret_token.rb exists here, we need to load it first.
+  require_relative 'secret_token.rb'
+rescue LoadError
+  # Normally secret_token.rb is missing and the secret token is
+  # configured by application.yml (i.e., here!) instead.
+end
+
+if (File.exists?(File.expand_path '../omniauth.rb', __FILE__) and
+    not defined? WARNED_OMNIAUTH_CONFIG)
+  Rails.logger.warn <<-EOS
+DEPRECATED CONFIGURATION:
+ Please move your SSO provider config into config/application.yml
+ and delete config/initializers/omniauth.rb.
+EOS
+  # Real values will be copied from globals by omniauth_init.rb. For
+  # now, assign some strings so the generic *.yml config loader
+  # doesn't overwrite them or complain that they're missing.
+  Rails.configuration.sso_app_id = 'xxx'
+  Rails.configuration.sso_app_secret = 'xxx'
+  Rails.configuration.sso_provider_url = '//xxx'
+  WARNED_OMNIAUTH_CONFIG = true
+end
+
  $application_config = {}
  
  %w(application.default application).each do |cfgfile|
@@ -5,6 +29,8 @@ $application_config = {}
    if File.exists? path
      yaml = ERB.new(IO.read path).result(binding)
      confs = YAML.load(yaml)
+    # Ignore empty YAML file:
+    next if confs == false
      $application_config.merge!(confs['common'] || {})
      $application_config.merge!(confs[::Rails.env.to_s] || {})
    end
diff --git a/services/api/config/initializers/omniauth.rb.example b/services/api/config/initializers/omniauth.rb.example

deleted file mode 100644 (file)

index aefcf56..0000000
--- a/services/api/config/initializers/omniauth.rb.example
+++ /dev/null
@@ -1,13 +0,0 @@
-# Change this omniauth configuration to point to your registered provider
-# Since this is a registered application, add the app id and secret here
-APP_ID = 'arvados-server'
-APP_SECRET = rand(2**512).to_s(36) # CHANGE ME!
-
-# Update your custom Omniauth provider URL here
-CUSTOM_PROVIDER_URL = 'http://localhost:3002'
-
-Rails.application.config.middleware.use OmniAuth::Builder do
-  provider :josh_id, APP_ID, APP_SECRET, CUSTOM_PROVIDER_URL
-end
-
-OmniAuth.config.on_failure = StaticController.action(:login_failure)
diff --git a/services/api/config/initializers/omniauth_init.rb b/services/api/config/initializers/omniauth_init.rb

new file mode 100644 (file)

index 0000000..3277c2b
--- /dev/null
+++ b/services/api/config/initializers/omniauth_init.rb
@@ -0,0 +1,19 @@
+# This file is called omniauth_init.rb instead of omniauth.rb because
+# older versions had site configuration in omniauth.rb.
+#
+# It must come after omniauth.rb in (lexical) load order.
+
+if defined? CUSTOM_PROVIDER_URL
+  Rails.logger.warn "Copying omniauth from globals in legacy config file."
+  Rails.configuration.sso_app_id = APP_ID
+  Rails.configuration.sso_app_secret = APP_SECRET
+  Rails.configuration.sso_provider_url = CUSTOM_PROVIDER_URL
+else
+  Rails.application.config.middleware.use OmniAuth::Builder do
+    provider(:josh_id,
+             Rails.configuration.sso_app_id,
+             Rails.configuration.sso_app_secret,
+             Rails.configuration.sso_provider_url)
+  end
+  OmniAuth.config.on_failure = StaticController.action(:login_failure)
+end
diff --git a/services/api/config/initializers/zz_preload_all_models.rb b/services/api/config/initializers/preload_all_models.rb

similarity index 55%

rename from services/api/config/initializers/zz_preload_all_models.rb

rename to services/api/config/initializers/preload_all_models.rb

index 1a76b72ed320f8ee9e2f261f1b2efb9b74eb9920..7e2612377434b9e3bfc245a8b4dc6143d6ad00c6 100644 (file)
--- a/services/api/config/initializers/zz_preload_all_models.rb
+++ b/services/api/config/initializers/preload_all_models.rb
@@ -1,7 +1,12 @@
  # See http://aaronvb.com/articles/37-rails-caching-and-undefined-class-module
  
+# Config must be done before we load model class files; otherwise they
+# won't be able to use Rails.configuration.* to initialize their
+# classes.
+require_relative 'load_config.rb'
+
  if Rails.env == 'development'
    Dir.foreach("#{Rails.root}/app/models") do |model_file|
      require_dependency model_file if model_file.match /\.rb$/
-  end 
+  end
  end
diff --git a/services/api/db/migrate/20150317132720_add_username_to_users.rb b/services/api/db/migrate/20150317132720_add_username_to_users.rb

new file mode 100644 (file)

index 0000000..de2fc96
--- /dev/null
+++ b/services/api/db/migrate/20150317132720_add_username_to_users.rb
@@ -0,0 +1,127 @@
+require 'has_uuid'
+require 'kind_and_etag'
+
+class AddUsernameToUsers < ActiveRecord::Migration
+  include CurrentApiClient
+
+  SEARCH_INDEX_COLUMNS =
+    ["uuid", "owner_uuid", "modified_by_client_uuid",
+     "modified_by_user_uuid", "email", "first_name", "last_name",
+     "identity_url", "default_owner_uuid"]
+
+  class ArvadosModel < ActiveRecord::Base
+    self.abstract_class = true
+    extend HasUuid::ClassMethods
+    include CurrentApiClient
+    include KindAndEtag
+    before_create do |record|
+      record.uuid ||= record.class.generate_uuid
+      record.owner_uuid ||= system_user_uuid
+    end
+    serialize :properties, Hash
+
+    def self.to_s
+      # Clean up the name of the stub model class so we generate correct UUIDs.
+      super.rpartition("::").last
+    end
+  end
+
+  class Log < ArvadosModel
+    def self.log_for(thing, age="old")
+      { "#{age}_etag" => thing.etag,
+        "#{age}_attributes" => thing.attributes,
+      }
+    end
+
+    def self.log_create(thing)
+      new_log("create", thing, log_for(thing, "new"))
+    end
+
+    def self.log_update(thing, start_state)
+      new_log("update", thing, start_state.merge(log_for(thing, "new")))
+    end
+
+    def self.log_destroy(thing)
+      new_log("destroy", thing, log_for(thing, "old"))
+    end
+
+    private
+
+    def self.new_log(event_type, thing, properties)
+      create!(event_type: event_type,
+              event_at: Time.now,
+              object_uuid: thing.uuid,
+              object_owner_uuid: thing.owner_uuid,
+              properties: properties)
+    end
+  end
+
+  class Link < ArvadosModel
+  end
+
+  class User < ArvadosModel
+  end
+
+  def sanitize_username(username)
+    username.
+      sub(/^[^A-Za-z]+/, "").
+      gsub(/[^A-Za-z0-9]/, "")
+  end
+
+  def usernames_wishlist(user)
+    usernames = Hash.new(0)
+    usernames[user.email.split("@", 2).first] += 1
+    Link.
+       where(tail_uuid: user.uuid, link_class: "permission", name: "can_login").
+       find_each do |login_perm|
+      username = login_perm.properties["username"]
+      usernames[username] += 2 if (username and not username.empty?)
+    end
+    usernames.keys.
+      sort_by { |n| -usernames[n] }.
+      map { |n| sanitize_username(n) }.
+      reject(&:empty?)
+  end
+
+  def increment_username(username)
+    @username_suffixes[username] += 1
+    "%s%i" % [username, @username_suffixes[username]]
+  end
+
+  def each_wanted_username(user)
+    usernames = usernames_wishlist(user)
+    usernames.each { |n| yield n }
+    base_username = usernames.first || "arvadosuser"
+    loop { yield increment_username(base_username) }
+  end
+
+  def recreate_search_index(columns)
+    remove_index :users, name: "users_search_index"
+    add_index :users, columns, name: "users_search_index"
+  end
+
+  def up
+    @username_suffixes = Hash.new(1)
+    add_column :users, :username, :string, null: true
+    add_index :users, :username, unique: true
+    recreate_search_index(SEARCH_INDEX_COLUMNS + ["username"])
+
+    [Link, Log, User].each { |m| m.reset_column_information }
+    User.validates(:username, uniqueness: true, allow_nil: true)
+    User.where(is_active: true).order(created_at: :asc).find_each do |user|
+      start_log = Log.log_for(user)
+      each_wanted_username(user) do |username|
+        user.username = username
+        break if user.valid?
+      end
+      user.save!
+      Log.log_update(user, start_log)
+    end
+  end
+
+  def down
+    remove_index :users, :username
+    recreate_search_index(SEARCH_INDEX_COLUMNS)
+    remove_column :users, :username
+  end
+end
diff --git a/services/api/db/migrate/20150324152204_backward_compatibility_for_user_repositories.rb b/services/api/db/migrate/20150324152204_backward_compatibility_for_user_repositories.rb

new file mode 100644 (file)

index 0000000..12b888d
--- /dev/null
+++ b/services/api/db/migrate/20150324152204_backward_compatibility_for_user_repositories.rb
@@ -0,0 +1,89 @@
+require 'has_uuid'
+require 'kind_and_etag'
+
+class BackwardCompatibilityForUserRepositories < ActiveRecord::Migration
+  include CurrentApiClient
+
+  class ArvadosModel < ActiveRecord::Base
+    self.abstract_class = true
+    extend HasUuid::ClassMethods
+    include CurrentApiClient
+    include KindAndEtag
+    before_create do |record|
+      record.uuid ||= record.class.generate_uuid
+      record.owner_uuid ||= system_user_uuid
+    end
+    serialize :properties, Hash
+
+    def self.to_s
+      # Clean up the name of the stub model class so we generate correct UUIDs.
+      super.rpartition("::").last
+    end
+  end
+
+  class Log < ArvadosModel
+    def self.log_for(thing, age="old")
+      { "#{age}_etag" => thing.etag,
+        "#{age}_attributes" => thing.attributes,
+      }
+    end
+
+    def self.log_create(thing)
+      new_log("create", thing, log_for(thing, "new"))
+    end
+
+    def self.log_update(thing, start_state)
+      new_log("update", thing, start_state.merge(log_for(thing, "new")))
+    end
+
+    def self.log_destroy(thing)
+      new_log("destroy", thing, log_for(thing, "old"))
+    end
+
+    private
+
+    def self.new_log(event_type, thing, properties)
+      create!(event_type: event_type,
+              event_at: Time.now,
+              object_uuid: thing.uuid,
+              object_owner_uuid: thing.owner_uuid,
+              properties: properties)
+    end
+  end
+
+  class Link < ArvadosModel
+  end
+
+  class Repository < ArvadosModel
+  end
+
+  def up
+    remove_index :repositories, name: "repositories_search_index"
+    add_index(:repositories, %w(uuid owner_uuid modified_by_client_uuid
+                                modified_by_user_uuid name),
+              name: "repositories_search_index")
+    remove_column :repositories, :fetch_url
+    remove_column :repositories, :push_url
+
+    [Link, Log, Repository].each { |m| m.reset_column_information }
+    Repository.where("owner_uuid != ?", system_user_uuid).find_each do |repo|
+      link_attrs = {
+        tail_uuid: repo.owner_uuid,
+        link_class: "permission", name: "can_manage", head_uuid: repo.uuid,
+      }
+      if Link.where(link_attrs).first.nil?
+        manage_link = Link.create!(link_attrs)
+        Log.log_create(manage_link)
+      end
+      start_log = Log.log_for(repo)
+      repo.owner_uuid = system_user_uuid
+      repo.save!
+      Log.log_update(repo, start_log)
+    end
+  end
+
+  def down
+    raise ActiveRecord::IrreversibleMigration.
+      new("can't restore prior fetch and push URLs")
+  end
+end
diff --git a/services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb b/services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb

new file mode 100644 (file)

index 0000000..e8423c1
--- /dev/null
+++ b/services/api/db/migrate/20150423145759_no_filenames_in_collection_search_index.rb
@@ -0,0 +1,11 @@
+class NoFilenamesInCollectionSearchIndex < ActiveRecord::Migration
+  def up
+    remove_index :collections, :name => 'collections_search_index'
+    add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name"], name: 'collections_search_index'
+  end
+
+  def down
+    remove_index :collections, :name => 'collections_search_index'
+    add_index :collections, ["owner_uuid", "modified_by_client_uuid", "modified_by_user_uuid", "portable_data_hash", "uuid", "name", "file_names"], name: 'collections_search_index'
+  end
+end
diff --git a/services/api/db/migrate/20150512193020_read_only_on_keep_services.rb b/services/api/db/migrate/20150512193020_read_only_on_keep_services.rb

new file mode 100644 (file)

index 0000000..f86e471
--- /dev/null
+++ b/services/api/db/migrate/20150512193020_read_only_on_keep_services.rb
@@ -0,0 +1,5 @@
+class ReadOnlyOnKeepServices < ActiveRecord::Migration
+  def change
+    add_column :keep_services, :read_only, :boolean, null: false, default: false
+  end
+end
diff --git a/services/api/db/migrate/20150526180251_leading_space_on_full_text_index.rb b/services/api/db/migrate/20150526180251_leading_space_on_full_text_index.rb

new file mode 100644 (file)

index 0000000..9433382
--- /dev/null
+++ b/services/api/db/migrate/20150526180251_leading_space_on_full_text_index.rb
@@ -0,0 +1,41 @@
+require "./db/migrate/20150123142953_full_text_search.rb"
+
+class LeadingSpaceOnFullTextIndex < ActiveRecord::Migration
+  def up
+    # Inspect one of the full-text indexes (chosen arbitrarily) to
+    # determine whether this migration is needed.
+    ft_index_name = 'jobs_full_text_search_idx'
+    ActiveRecord::Base.connection.indexes('jobs').each do |idx|
+      if idx.name == ft_index_name
+        if idx.columns.first.index "((((' '"
+          # Index is already correct. This happens if the source tree
+          # already had the new version of full_text_tsvector by the
+          # time the initial FullTextSearch migration ran.
+          $stderr.puts "This migration is not needed."
+        else
+          # Index was created using the old full_text_tsvector. Drop
+          # and re-create all full text indexes.
+          FullTextSearch.new.migrate(:down)
+          FullTextSearch.new.migrate(:up)
+        end
+        return
+      end
+    end
+    raise "Did not find index '#{ft_index_name}'. Earlier migration missed??"
+  end
+
+  def down
+    $stderr.puts <<EOS
+Down-migration is not supported for this change, and might be unnecessary.
+
+If you run a code base older than 20150526180251 against this
+database, full text search will be slow even on collections where it
+used to work well. If this is a concern, first check out the desired
+older version of the code base, and then run
+"rake db:migrate:down VERSION=20150123142953"
+followed by
+"rake db:migrate:up VERSION=20150123142953"
+.
+EOS
+  end
+end
diff --git a/services/api/db/structure.sql b/services/api/db/structure.sql

index 0711f9025190bbf2a125d52e343f39b07a984d9d..01bb4172f1963cb12ddc557e2904eb021237a10d 100644 (file)
--- a/services/api/db/structure.sql
+++ b/services/api/db/structure.sql
@@ -19,7 +19,7 @@ CREATE EXTENSION IF NOT EXISTS plpgsql WITH SCHEMA pg_catalog;
  -- Name: EXTENSION plpgsql; Type: COMMENT; Schema: -; Owner: -
  --
  
-COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
+-- COMMENT ON EXTENSION plpgsql IS 'PL/pgSQL procedural language';
  
  
  SET search_path = public, pg_catalog;
@@ -518,7 +518,8 @@ CREATE TABLE keep_services (
      service_ssl_flag boolean,
      service_type character varying(255),
      created_at timestamp without time zone NOT NULL,
-    updated_at timestamp without time zone NOT NULL
+    updated_at timestamp without time zone NOT NULL,
+    read_only boolean DEFAULT false NOT NULL
  );
  
  
@@ -760,8 +761,6 @@ CREATE TABLE repositories (
      modified_by_user_uuid character varying(255),
      modified_at timestamp without time zone,
      name character varying(255),
-    fetch_url character varying(255),
-    push_url character varying(255),
      created_at timestamp without time zone NOT NULL,
      updated_at timestamp without time zone NOT NULL
  );
@@ -889,7 +888,8 @@ CREATE TABLE users (
      prefs text,
      updated_at timestamp without time zone NOT NULL,
      default_owner_uuid character varying(255),
-    is_active boolean DEFAULT false
+    is_active boolean DEFAULT false,
+    username character varying(255)
  );
  
  
@@ -1310,21 +1310,21 @@ CREATE UNIQUE INDEX collection_owner_uuid_name_unique ON collections USING btree
  -- Name: collections_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((COALESCE(owner_uuid, ''::character varying))::text || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
+CREATE INDEX collections_full_text_search_idx ON collections USING gin (to_tsvector('english'::regconfig, (((((((((((((((((' '::text || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(portable_data_hash, ''::character varying))::text) || ' '::text) || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(file_names, ''::character varying))::text)));
  
  
  --
  -- Name: collections_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name, file_names);
+CREATE INDEX collections_search_index ON collections USING btree (owner_uuid, modified_by_client_uuid, modified_by_user_uuid, portable_data_hash, uuid, name);
  
  
  --
  -- Name: groups_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX groups_full_text_search_idx ON groups USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text)));
+CREATE INDEX groups_full_text_search_idx ON groups USING gin (to_tsvector('english'::regconfig, (((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(group_class, ''::character varying))::text)));
  
  
  --
@@ -1964,6 +1964,13 @@ CREATE INDEX index_users_on_modified_at ON users USING btree (modified_at);
  CREATE INDEX index_users_on_owner_uuid ON users USING btree (owner_uuid);
  
  
+--
+-- Name: index_users_on_username; Type: INDEX; Schema: public; Owner: -; Tablespace: 
+--
+
+CREATE UNIQUE INDEX index_users_on_username ON users USING btree (username);
+
+
  --
  -- Name: index_users_on_uuid; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
@@ -2003,7 +2010,7 @@ CREATE INDEX job_tasks_search_index ON job_tasks USING btree (uuid, owner_uuid,
  -- Name: jobs_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX jobs_full_text_search_idx ON jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text)));
+CREATE INDEX jobs_full_text_search_idx ON jobs USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((((((((((((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(submit_id, ''::character varying))::text) || ' '::text) || (COALESCE(script, ''::character varying))::text) || ' '::text) || (COALESCE(script_version, ''::character varying))::text) || ' '::text) || COALESCE(script_parameters, ''::text)) || ' '::text) || (COALESCE(cancelled_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(cancelled_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(output, ''::character varying))::text) || ' '::text) || (COALESCE(is_locked_by_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(log, ''::character varying))::text) || ' '::text) || COALESCE(tasks_summary, ''::text)) || ' '::text) || COALESCE(runtime_constraints, ''::text)) || ' '::text) || (COALESCE(repository, ''::character varying))::text) || ' '::text) || (COALESCE(supplied_script_version, ''::character varying))::text) || ' '::text) || (COALESCE(docker_image_locator, ''::character varying))::text) || ' '::text) || (COALESCE(description, ''::character varying))::text) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || (COALESCE(arvados_sdk_version, ''::character varying))::text)));
  
  
  --
@@ -2059,7 +2066,7 @@ CREATE INDEX nodes_search_index ON nodes USING btree (uuid, owner_uuid, modified
  -- Name: pipeline_instances_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_instances_full_text_search_idx ON pipeline_instances USING gin (to_tsvector('english'::regconfig, (((((((((((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(pipeline_template_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || COALESCE(properties, ''::text)) || ' '::text) || (COALESCE(state, ''::character varying))::text) || ' '::text) || COALESCE(components_summary, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
  
  
  --
@@ -2080,7 +2087,7 @@ CREATE UNIQUE INDEX pipeline_template_owner_uuid_name_unique ON pipeline_templat
  -- Name: pipeline_templates_full_text_search_idx; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((COALESCE(uuid, ''::character varying))::text || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
+CREATE INDEX pipeline_templates_full_text_search_idx ON pipeline_templates USING gin (to_tsvector('english'::regconfig, (((((((((((((' '::text || (COALESCE(uuid, ''::character varying))::text) || ' '::text) || (COALESCE(owner_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_client_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(modified_by_user_uuid, ''::character varying))::text) || ' '::text) || (COALESCE(name, ''::character varying))::text) || ' '::text) || COALESCE(components, ''::text)) || ' '::text) || (COALESCE(description, ''::character varying))::text)));
  
  
  --
@@ -2094,7 +2101,7 @@ CREATE INDEX pipeline_templates_search_index ON pipeline_templates USING btree (
  -- Name: repositories_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX repositories_search_index ON repositories USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name, fetch_url, push_url);
+CREATE INDEX repositories_search_index ON repositories USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, name);
  
  
  --
@@ -2122,7 +2129,7 @@ CREATE UNIQUE INDEX unique_schema_migrations ON schema_migrations USING btree (v
  -- Name: users_search_index; Type: INDEX; Schema: public; Owner: -; Tablespace: 
  --
  
-CREATE INDEX users_search_index ON users USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, email, first_name, last_name, identity_url, default_owner_uuid);
+CREATE INDEX users_search_index ON users USING btree (uuid, owner_uuid, modified_by_client_uuid, modified_by_user_uuid, email, first_name, last_name, identity_url, default_owner_uuid, username);
  
  
  --
@@ -2364,4 +2371,14 @@ INSERT INTO schema_migrations (version) VALUES ('20150216193428');
  
  INSERT INTO schema_migrations (version) VALUES ('20150303210106');
  
-INSERT INTO schema_migrations (version) VALUES ('20150312151136');
-\ No newline at end of file
+INSERT INTO schema_migrations (version) VALUES ('20150312151136');
+
+INSERT INTO schema_migrations (version) VALUES ('20150317132720');
+
+INSERT INTO schema_migrations (version) VALUES ('20150324152204');
+
+INSERT INTO schema_migrations (version) VALUES ('20150423145759');
+
+INSERT INTO schema_migrations (version) VALUES ('20150512193020');
+
+INSERT INTO schema_migrations (version) VALUES ('20150526180251');
+\ No newline at end of file
diff --git a/services/api/lib/eventbus.rb b/services/api/lib/eventbus.rb

index 35671d65b287e495a76b2fc94b47cdf588983350..ac53876122d6b2e74b0d9fed85a56308308465b4 100644 (file)
--- a/services/api/lib/eventbus.rb
+++ b/services/api/lib/eventbus.rb
@@ -116,7 +116,7 @@ class EventBus
  
          # Execute query and actually send the matching log rows
          count = 0
-        limit = 20
+        limit = 10
  
          logs.limit(limit).each do |l|
            ws.send(l.as_api_response.to_json)
@@ -128,7 +128,7 @@ class EventBus
            # Number of rows returned was capped by limit(), we need to schedule
            # another query to get more logs (will start from last_log_id
            # reported by current query)
-          EventMachine::schedule do
+          EventMachine::next_tick do
              push_events ws, nil
            end
          elsif !notify_id.nil? and (ws.last_log_id.nil? or notify_id > ws.last_log_id)
@@ -140,10 +140,15 @@ class EventBus
          # No filters set up, so just record the sequence number
          ws.last_log_id = notify_id
        end
+    rescue ArgumentError => e
+      # There was some kind of user error.
+      Rails.logger.warn "Error publishing event: #{$!}"
+      ws.send ({status: 500, message: $!}.to_json)
+      ws.close
      rescue => e
        Rails.logger.warn "Error publishing event: #{$!}"
        Rails.logger.warn "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
-      ws.send ({status: 500, message: 'error'}.to_json)
+      ws.send ({status: 500, message: $!}.to_json)
        ws.close
        # These exceptions typically indicate serious server trouble:
        # out of memory issues, database connection problems, etc.  Go ahead and
diff --git a/services/api/lib/salvage_collection.rb b/services/api/lib/salvage_collection.rb

new file mode 100755 (executable)

index 0000000..2011f81
--- /dev/null
+++ b/services/api/lib/salvage_collection.rb
@@ -0,0 +1,94 @@
+module SalvageCollection
+  # Take two input parameters: a collection uuid and reason
+  # Get "src_collection" with the given uuid
+  # Create a new collection with:
+  #   src_collection.manifest_text as "invalid_manifest_text.txt"
+  #   Locators from src_collection.manifest_text as "salvaged_data"
+  # Update src_collection:
+  #   Set src_collection.manifest_text to: ""
+  #   Append to src_collection.name: " (reason; salvaged data at new_collection.uuid)"
+  #   Set portable_data_hash to "d41d8cd98f00b204e9800998ecf8427e+0"
+
+  require File.dirname(__FILE__) + '/../config/environment'
+  include ApplicationHelper
+  require 'tempfile'
+  require 'shellwords'
+
+  def salvage_collection_arv_put cmd
+    new_manifest = %x(#{cmd})
+    if $?.success?
+      new_manifest
+    else
+      raise "Error during arv-put: #{$?} (cmd was #{cmd.inspect})"
+    end
+  end
+
+  # Get all the locators (and perhaps other strings that look a lot
+  # like a locators) from the original manifest, even if they don't
+  # appear in the correct positions with the correct space delimiters.
+  def salvage_collection_locator_data manifest
+    locators = []
+    size = 0
+    manifest.scan /(^|[^[:xdigit:]])([[:xdigit:]]{32})((\+\d+)(\+|\b))?/ do |_, hash, _, sizehint, _|
+      if sizehint
+        locators << hash.downcase + sizehint
+        size += sizehint.to_i
+      else
+        locators << hash.downcase
+      end
+    end
+    locators << 'd41d8cd98f00b204e9800998ecf8427e+0' if !locators.any?
+    return [locators, size]
+  end
+
+  def salvage_collection uuid, reason='salvaged - see #6277, #6859'
+    act_as_system_user do
+      if !ENV['ARVADOS_API_TOKEN'].present? or !ENV['ARVADOS_API_HOST'].present?
+        raise "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables."
+      end
+
+      if !uuid.present?
+        raise "Collection UUID is required."
+      end
+
+      src_collection = Collection.find_by_uuid uuid
+      if !src_collection
+        raise "No collection found for #{uuid}."
+      end
+
+      src_manifest = src_collection.manifest_text || ''
+
+      # create new collection using 'arv-put' with original manifest_text as the data
+      temp_file = Tempfile.new('temp')
+      temp_file.write(src_manifest)
+
+      temp_file.close
+
+      new_manifest = salvage_collection_arv_put "arv-put --as-stream --use-filename invalid_manifest_text.txt #{Shellwords::shellescape(temp_file.path)}"
+
+      temp_file.unlink
+
+      # Get the locator data in the format [[locators], size] from the original manifest
+      locator_data = salvage_collection_locator_data src_manifest
+
+      new_manifest += (". #{locator_data[0].join(' ')} 0:#{locator_data[1]}:salvaged_data\n")
+
+      new_collection = Collection.new
+      new_collection.name = "salvaged from #{src_collection.uuid}, #{src_collection.portable_data_hash}"
+      new_collection.manifest_text = new_manifest
+
+      created = new_collection.save!
+      raise "New collection creation failed." if !created
+
+      $stderr.puts "Salvaged manifest and data for #{uuid} are in #{new_collection.uuid}."
+      puts "Created new collection #{new_collection.uuid}"
+
+      # update src_collection collection name, pdh, and manifest_text
+      src_collection.name = (src_collection.name || '') + ' (' + (reason || '') + '; salvaged data at ' + new_collection.uuid + ')'
+      src_collection.manifest_text = ''
+      src_collection.portable_data_hash = 'd41d8cd98f00b204e9800998ecf8427e+0'
+      src_collection.save!
+      $stderr.puts "Collection #{uuid} emptied and renamed to #{src_collection.name.inspect}."
+    end
+  end
+end
diff --git a/services/api/lib/tasks/delete_old_job_logs.rake b/services/api/lib/tasks/delete_old_job_logs.rake

new file mode 100644 (file)

index 0000000..7f2b31e
--- /dev/null
+++ b/services/api/lib/tasks/delete_old_job_logs.rake
@@ -0,0 +1,17 @@
+# This task finds jobs that have been finished for at least as long as
+# the duration specified in the `clean_job_log_rows_after`
+# configuration setting, and deletes their stderr logs from the logs table.
+
+namespace :db do
+  desc "Remove old job stderr entries from the logs table"
+  task delete_old_job_logs: :environment do
+    Log.select("logs.id").
+        joins("JOIN jobs ON object_uuid = jobs.uuid").
+        where("event_type = :etype AND jobs.log IS NOT NULL AND jobs.finished_at < :age",
+              etype: "stderr",
+              age: Rails.configuration.clean_job_log_rows_after.ago).
+        find_in_batches do |old_log_ids|
+      Log.where(id: old_log_ids.map(&:id)).delete_all
+    end
+  end
+end
diff --git a/services/api/lib/tasks/test_tasks.rake b/services/api/lib/tasks/test_tasks.rake

new file mode 100644 (file)

index 0000000..27bf232
--- /dev/null
+++ b/services/api/lib/tasks/test_tasks.rake
@@ -0,0 +1,6 @@
+namespace :test do
+  new_task = Rake::TestTask.new(tasks: "test:prepare") do |t|
+    t.libs << "test"
+    t.pattern = "test/tasks/**/*_test.rb"
+  end
+end
diff --git a/services/api/script/arvados-git-sync.rb b/services/api/script/arvados-git-sync.rb

new file mode 100755 (executable)

index 0000000..3a8ed27
--- /dev/null
+++ b/services/api/script/arvados-git-sync.rb
@@ -0,0 +1,266 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'pp'
+require 'arvados'
+require 'tempfile'
+require 'yaml'
+require 'fileutils'
+
+# This script does the actual gitolite config management on disk.
+#
+# Ward Vandewege <ward@curoverse.com>
+
+# Default is development
+production = ARGV[0] == "production"
+
+ENV["RAILS_ENV"] = "development"
+ENV["RAILS_ENV"] = "production" if production
+
+DEBUG = 1
+
+# load and merge in the environment-specific application config info
+# if present, overriding base config parameters as specified
+path = File.absolute_path('../../config/arvados-clients.yml', __FILE__)
+if File.exists?(path) then
+  cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
+else
+  puts "Please create a\n #{path}\n file"
+  exit 1
+end
+
+gitolite_url = cp_config['gitolite_url']
+gitolite_arvados_git_user_key = cp_config['gitolite_arvados_git_user_key']
+
+gitolite_tmpdir = cp_config['gitolite_tmp']
+gitolite_admin = File.join(gitolite_tmpdir, 'gitolite-admin')
+gitolite_admin_keydir = File.join(gitolite_admin, 'keydir')
+gitolite_keydir = File.join(gitolite_admin, 'keydir', 'arvados')
+
+ENV['ARVADOS_API_HOST'] = cp_config['arvados_api_host']
+ENV['ARVADOS_API_TOKEN'] = cp_config['arvados_api_token']
+if cp_config['arvados_api_host_insecure']
+  ENV['ARVADOS_API_HOST_INSECURE'] = 'true'
+else
+  ENV.delete('ARVADOS_API_HOST_INSECURE')
+end
+
+def ensure_directory(path, mode)
+  begin
+    Dir.mkdir(path, mode)
+  rescue Errno::EEXIST
+  end
+end
+
+def replace_file(path, contents)
+  unlink_now = true
+  dirname, basename = File.split(path)
+  FileUtils.mkpath(dirname)
+  new_file = Tempfile.new([basename, ".tmp"], dirname)
+  begin
+    new_file.write(contents)
+    new_file.flush
+    File.rename(new_file, path)
+    unlink_now = false
+  ensure
+    new_file.close(unlink_now)
+  end
+end
+
+def file_has_contents?(path, contents)
+  begin
+    IO.read(path) == contents
+  rescue Errno::ENOENT
+    false
+  end
+end
+
+module TrackCommitState
+  module ClassMethods
+    # Note that all classes that include TrackCommitState will have
+    # @@need_commit = true if any of them set it.  Since this flag reports
+    # a boolean state of the underlying git repository, that's OK in the
+    # current implementation.
+    @@need_commit = false
+
+    def changed?
+      @@need_commit
+    end
+
+    def ensure_in_git(path, contents)
+      unless file_has_contents?(path, contents)
+        replace_file(path, contents)
+        system("git", "add", path)
+        @@need_commit = true
+      end
+    end
+  end
+
+  def ensure_in_git(path, contents)
+    self.class.ensure_in_git(path, contents)
+  end
+
+  def self.included(base)
+    base.extend(ClassMethods)
+  end
+end
+
+class UserSSHKeys
+  include TrackCommitState
+
+  def initialize(user_keys_map, key_dir)
+    @user_keys_map = user_keys_map
+    @key_dir = key_dir
+    @installed = {}
+  end
+
+  def install(filename, pubkey)
+    unless pubkey.nil?
+      key_path = File.join(@key_dir, filename)
+      ensure_in_git(key_path, pubkey)
+    end
+    @installed[filename] = true
+  end
+
+  def ensure_keys_for_user(user_uuid)
+    return unless key_list = @user_keys_map.delete(user_uuid)
+    key_list.map { |k| k[:public_key] }.compact.each_with_index do |pubkey, ii|
+      # Handle putty-style ssh public keys
+      pubkey.sub!(/^(Comment: "r[^\n]*\n)(.*)$/m,'ssh-rsa \2 \1')
+      pubkey.sub!(/^(Comment: "d[^\n]*\n)(.*)$/m,'ssh-dss \2 \1')
+      pubkey.gsub!(/\n/,'')
+      pubkey.strip!
+      install("#{user_uuid}@#{ii}.pub", pubkey)
+    end
+  end
+
+  def installed?(filename)
+    @installed[filename]
+  end
+end
+
+class Repository
+  include TrackCommitState
+
+  @@aliases = {}
+
+  def initialize(arv_repo, user_keys)
+    @arv_repo = arv_repo
+    @user_keys = user_keys
+  end
+
+  def self.ensure_system_config(conf_root)
+    ensure_in_git(File.join(conf_root, "conf", "gitolite.conf"),
+                  %Q{include "auto/*.conf"\ninclude "admin/*.conf"\n})
+    ensure_in_git(File.join(conf_root, "arvadosaliases.pl"), alias_config)
+
+    conf_path = File.join(conf_root, "conf", "admin", "arvados.conf")
+    conf_file = %Q{
+@arvados_git_user = arvados_git_user
+
+repo gitolite-admin
+     RW           = @arvados_git_user
+
+}
+    ensure_directory(File.dirname(conf_path), 0755)
+    ensure_in_git(conf_path, conf_file)
+  end
+
+  def ensure_config(conf_root)
+    if name and (File.exist?(auto_conf_path(conf_root, name)))
+      # This gitolite installation knows the repository by name, rather than
+      # UUID.  Leave it configured that way until a separate migration is run.
+      basename = name
+    else
+      basename = uuid
+      @@aliases[name] = uuid unless name.nil?
+    end
+    conf_file = "\nrepo #{basename}\n"
+    @arv_repo[:user_permissions].sort.each do |user_uuid, perm|
+      conf_file += "\t#{perm[:gitolite_permissions]}\t= #{user_uuid}\n"
+      @user_keys.ensure_keys_for_user(user_uuid)
+    end
+    ensure_in_git(auto_conf_path(conf_root, basename), conf_file)
+  end
+
+  private
+
+  def auto_conf_path(conf_root, basename)
+    File.join(conf_root, "conf", "auto", "#{basename}.conf")
+  end
+
+  def uuid
+    @arv_repo[:uuid]
+  end
+
+  def name
+    if @arv_repo[:name].nil?
+      nil
+    else
+      @clean_name ||=
+        @arv_repo[:name].sub(/^[^A-Za-z]+/, "").gsub(/[^\w\.\/]/, "")
+    end
+  end
+
+  def self.alias_config
+    conf_s = "{\n"
+    @@aliases.sort.each do |(repo_name, repo_uuid)|
+      conf_s += "\t'#{repo_name}' \t=> '#{repo_uuid}',\n"
+    end
+    conf_s += "};\n"
+    conf_s
+  end
+end
+
+begin
+  # Get our local gitolite-admin repo up to snuff
+  if not File.exists?(gitolite_admin) then
+    ensure_directory(gitolite_tmpdir, 0700)
+    Dir.chdir(gitolite_tmpdir)
+    `git clone #{gitolite_url}`
+    Dir.chdir(gitolite_admin)
+  else
+    Dir.chdir(gitolite_admin)
+    `git pull`
+  end
+
+  arv = Arvados.new
+  permissions = arv.repository.get_all_permissions
+
+  ensure_directory(gitolite_keydir, 0700)
+  admin_user_ssh_keys = UserSSHKeys.new(permissions[:user_keys], gitolite_admin_keydir)
+  # Make sure the arvados_git_user key is installed; put it in gitolite_admin_keydir
+  # because that is where gitolite will try to put it if we do not.
+  admin_user_ssh_keys.install('arvados_git_user.pub', gitolite_arvados_git_user_key)
+
+  user_ssh_keys = UserSSHKeys.new(permissions[:user_keys], gitolite_keydir)
+  permissions[:repositories].each do |repo_record|
+    repo = Repository.new(repo_record, user_ssh_keys)
+    repo.ensure_config(gitolite_admin)
+  end
+  Repository.ensure_system_config(gitolite_admin)
+
+  # Clean up public key files that should not be present
+  Dir.chdir(gitolite_keydir)
+  stale_keys = Dir.glob('*.pub').reject do |key_file|
+    user_ssh_keys.installed?(key_file)
+  end
+  if stale_keys.any?
+    stale_keys.each { |key_file| puts "Extra file #{key_file}" }
+    system("git", "rm", "--quiet", *stale_keys)
+  end
+
+  if UserSSHKeys.changed? or Repository.changed? or stale_keys.any?
+    message = "#{Time.now().to_s}: update from API"
+    Dir.chdir(gitolite_admin)
+    `git add --all`
+    `git commit -m '#{message}'`
+    `git push`
+  end
+
+rescue => bang
+  puts "Error: " + bang.to_s
+  puts bang.backtrace.join("\n")
+  exit 1
+end
+
diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb

index ab4f70e60bd1115a87a47eeed64a544859bf49fd..27cb82115b2f7993bce20b7a589bdb62eccad619 100755 (executable)
--- a/services/api/script/crunch-dispatch.rb
+++ b/services/api/script/crunch-dispatch.rb
@@ -53,12 +53,18 @@ end
  class Dispatcher
    include ApplicationHelper
  
+  EXIT_TEMPFAIL = 75
+  EXIT_RETRY_UNLOCKED = 93
+  RETRY_UNLOCKED_LIMIT = 3
+
    def initialize
      @crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip)
      if @crunch_job_bin.empty?
        raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path."
      end
  
+    @docker_bin = ENV['CRUNCH_JOB_DOCKER_BIN']
+
      @arvados_internal = Rails.configuration.git_internal_dir
      if not File.exists? @arvados_internal
        $stderr.puts `mkdir -p #{@arvados_internal.shellescape} && git init --bare #{@arvados_internal.shellescape}`
@@ -66,6 +72,7 @@ class Dispatcher
      end
  
      @repo_root = Rails.configuration.git_repositories_dir
+    @arvados_repo_path = Repository.where(name: "arvados").first.server_path
      @authorizations = {}
      @did_recently = {}
      @fetched_commits = {}
@@ -74,6 +81,8 @@ class Dispatcher
      @pipe_auth_tokens = {}
      @running = {}
      @todo = []
+    @todo_job_retries = {}
+    @job_retry_counts = Hash.new(0)
      @todo_pipelines = []
    end
  
@@ -83,7 +92,7 @@ class Dispatcher
  
    def refresh_todo
      if $options[:jobs]
-      @todo = Job.queue.select(&:repository)
+      @todo = @todo_job_retries.values + Job.queue.select(&:repository)
      end
      if $options[:pipelines]
        @todo_pipelines = PipelineInstance.queue
@@ -276,35 +285,24 @@ class Dispatcher
      @authorizations[job.uuid]
    end
  
-  def get_commit(repo_name, commit_hash)
-    # @fetched_commits[V]==true if we know commit V exists in the
-    # arvados_internal git repository.
-    if !@fetched_commits[commit_hash]
-      src_repo = File.join(@repo_root, "#{repo_name}.git")
-      if not File.exists? src_repo
-        src_repo = File.join(@repo_root, repo_name, '.git')
-        if not File.exists? src_repo
-          fail_job job, "No #{repo_name}.git or #{repo_name}/.git at #{@repo_root}"
-          return nil
-        end
-      end
-
-      # check if the commit needs to be fetched or not
-      commit_rev = stdout_s(git_cmd("rev-list", "-n1", commit_hash),
-                            err: "/dev/null")
-      unless $? == 0 and commit_rev == commit_hash
-        # commit does not exist in internal repository, so import the source repository using git fetch-pack
-        cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
-        $stderr.puts "dispatch: #{cmd}"
-        $stderr.puts(stdout_s(cmd))
-        unless $? == 0
-          fail_job job, "git fetch-pack failed"
-          return nil
-        end
-      end
-      @fetched_commits[commit_hash] = true
+  def internal_repo_has_commit? sha1
+    if (not @fetched_commits[sha1] and
+        sha1 == stdout_s(git_cmd("rev-list", "-n1", sha1), err: "/dev/null") and
+        $? == 0)
+      @fetched_commits[sha1] = true
      end
-    @fetched_commits[commit_hash]
+    return @fetched_commits[sha1]
+  end
+
+  def get_commit src_repo, sha1
+    return true if internal_repo_has_commit? sha1
+
+    # commit does not exist in internal repository, so import the
+    # source repository using git fetch-pack
+    cmd = git_cmd("fetch-pack", "--no-progress", "--all", src_repo)
+    $stderr.puts "dispatch: #{cmd}"
+    $stderr.puts(stdout_s(cmd))
+    @fetched_commits[sha1] = ($? == 0)
    end
  
    def tag_commit(commit_hash, tag_name)
@@ -376,6 +374,7 @@ class Dispatcher
        if Server::Application.config.crunch_job_user
          cmd_args.unshift("sudo", "-E", "-u",
                           Server::Application.config.crunch_job_user,
+                         "LD_LIBRARY_PATH=#{ENV['LD_LIBRARY_PATH']}",
                           "PATH=#{ENV['PATH']}",
                           "PERLLIB=#{ENV['PERLLIB']}",
                           "PYTHONPATH=#{ENV['PYTHONPATH']}",
@@ -383,20 +382,56 @@ class Dispatcher
                           "GEM_PATH=#{ENV['GEM_PATH']}")
        end
  
-      ready = (get_authorization(job) and
-               get_commit(job.repository, job.script_version) and
-               tag_commit(job.script_version, job.uuid))
-      if ready and job.arvados_sdk_version
-        ready = (get_commit("arvados", job.arvados_sdk_version) and
-                 tag_commit(job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"))
+      next unless get_authorization job
+
+      ready = internal_repo_has_commit? job.script_version
+
+      if not ready
+        # Import the commit from the specified repository into the
+        # internal repository. This should have been done already when
+        # the job was created/updated; this code is obsolete except to
+        # avoid deployment races. Failing the job would be a
+        # reasonable thing to do at this point.
+        repo = Repository.where(name: job.repository).first
+        if repo.nil? or repo.server_path.nil?
+          fail_job "Repository #{job.repository} not found under #{@repo_root}"
+          next
+        end
+        ready &&= get_commit repo.server_path, job.script_version
+        ready &&= tag_commit job.script_version, job.uuid
+      end
+
+      # This should be unnecessary, because API server does it during
+      # job create/update, but it's still not a bad idea to verify the
+      # tag is correct before starting the job:
+      ready &&= tag_commit job.script_version, job.uuid
+
+      # The arvados_sdk_version doesn't support use of arbitrary
+      # remote URLs, so the requested version isn't necessarily copied
+      # into the internal repository yet.
+      if job.arvados_sdk_version
+        ready &&= get_commit @arvados_repo_path, job.arvados_sdk_version
+        ready &&= tag_commit job.arvados_sdk_version, "#{job.uuid}-arvados-sdk"
+      end
+
+      if not ready
+        fail_job job, "commit not present in internal repository"
+        next
        end
-      next unless ready
  
        cmd_args += [@crunch_job_bin,
                     '--job-api-token', @authorizations[job.uuid].api_token,
                     '--job', job.uuid,
                     '--git-dir', @arvados_internal]
  
+      if @docker_bin
+        cmd_args += ['--docker-bin', @docker_bin]
+      end
+
+      if @todo_job_retries.include?(job.uuid)
+        cmd_args << "--force-unlock"
+      end
+
        $stderr.puts "dispatch: #{cmd_args.join ' '}"
  
        begin
@@ -432,6 +467,7 @@ class Dispatcher
          log_throttle_bytes_skipped: 0,
        }
        i.close
+      @todo_job_retries.delete(job.uuid)
        update_node_status
      end
    end
@@ -614,8 +650,6 @@ class Dispatcher
      return if !pid_done
  
      job_done = j_done[:job]
-    $stderr.puts "dispatch: child #{pid_done} exit"
-    $stderr.puts "dispatch: job #{job_done.uuid} end"
  
      # Ensure every last drop of stdout and stderr is consumed.
      read_pipes
@@ -632,23 +666,49 @@ class Dispatcher
  
      # Wait the thread (returns a Process::Status)
      exit_status = j_done[:wait_thr].value.exitstatus
+    exit_tempfail = exit_status == EXIT_TEMPFAIL
+
+    $stderr.puts "dispatch: child #{pid_done} exit #{exit_status}"
+    $stderr.puts "dispatch: job #{job_done.uuid} end"
  
      jobrecord = Job.find_by_uuid(job_done.uuid)
-    if exit_status != 75 and jobrecord.state == "Running"
-      # crunch-job did not return exit code 75 (see below) and left the job in
-      # the "Running" state, which means there was an unhandled error.  Fail
-      # the job.
-      jobrecord.state = "Failed"
-      if not jobrecord.save
-        $stderr.puts "dispatch: jobrecord.save failed"
+
+    if exit_status == EXIT_RETRY_UNLOCKED
+      # The job failed because all of the nodes allocated to it
+      # failed.  Only this crunch-dispatch process can retry the job:
+      # it's already locked, and there's no way to put it back in the
+      # Queued state.  Put it in our internal todo list unless the job
+      # has failed this way excessively.
+      @job_retry_counts[jobrecord.uuid] += 1
+      exit_tempfail = @job_retry_counts[jobrecord.uuid] <= RETRY_UNLOCKED_LIMIT
+      if exit_tempfail
+        @todo_job_retries[jobrecord.uuid] = jobrecord
+      else
+        $stderr.puts("dispatch: job #{jobrecord.uuid} exceeded node failure retry limit -- giving up")
+      end
+    end
+
+    if !exit_tempfail
+      @job_retry_counts.delete(jobrecord.uuid)
+      if jobrecord.state == "Running"
+        # Apparently there was an unhandled error.  That could potentially
+        # include "all allocated nodes failed" when we don't to retry
+        # because the job has already been retried RETRY_UNLOCKED_LIMIT
+        # times.  Fail the job.
+        jobrecord.state = "Failed"
+        if not jobrecord.save
+          $stderr.puts "dispatch: jobrecord.save failed"
+        end
        end
      else
-      # Don't fail the job if crunch-job didn't even get as far as
-      # starting it. If the job failed to run due to an infrastructure
+      # If the job failed to run due to an infrastructure
        # issue with crunch-job or slurm, we want the job to stay in the
        # queue. If crunch-job exited after losing a race to another
        # crunch-job process, it exits 75 and we should leave the job
-      # record alone so the winner of the race do its thing.
+      # record alone so the winner of the race can do its thing.
+      # If crunch-job exited after all of its allocated nodes failed,
+      # it exits 93, and we want to retry it later (see the
+      # EXIT_RETRY_UNLOCKED `if` block).
        #
        # There is still an unhandled race condition: If our crunch-job
        # process is about to lose a race with another crunch-job
@@ -662,7 +722,7 @@ class Dispatcher
  
      # Invalidate the per-job auth token, unless the job is still queued and we
      # might want to try it again.
-    if jobrecord.state != "Queued"
+    if jobrecord.state != "Queued" and !@todo_job_retries.include?(jobrecord.uuid)
        j_done[:job_auth].update_attributes expires_at: Time.now
      end
  
@@ -716,6 +776,14 @@ class Dispatcher
        select(@running.values.collect { |j| [j[:stdout], j[:stderr]] }.flatten,
               [], [], 1)
      end
+    # If there are jobs we wanted to retry, we have to mark them as failed now.
+    # Other dispatchers can't pick them up because we hold their lock.
+    @todo_job_retries.each_key do |job_uuid|
+      job = Job.find_by_uuid(job_uuid)
+      if job.state == "Running"
+        fail_job(job, "crunch-dispatch was stopped during job's tempfail retry loop")
+      end
+    end
    end
  
    protected
@@ -758,4 +826,10 @@ end
  # This is how crunch-job child procs know where the "refresh" trigger file is
  ENV["CRUNCH_REFRESH_TRIGGER"] = Rails.configuration.crunch_refresh_trigger
  
+# If salloc can't allocate resources immediately, make it use our temporary
+# failure exit code.  This ensures crunch-dispatch won't mark a job failed
+# because of an issue with node allocation.  This often happens when
+# another dispatcher wins the race to allocate nodes.
+ENV["SLURM_EXIT_IMMEDIATE"] = Dispatcher::EXIT_TEMPFAIL.to_s
+
  Dispatcher.new.run
diff --git a/services/api/script/migrate-gitolite-to-uuid-storage.rb b/services/api/script/migrate-gitolite-to-uuid-storage.rb

new file mode 100755 (executable)

index 0000000..8db1a0e
--- /dev/null
+++ b/services/api/script/migrate-gitolite-to-uuid-storage.rb
@@ -0,0 +1,220 @@
+#!/usr/bin/env ruby
+#
+# Prior to April 2015, Arvados Gitolite integration stored repositories by
+# name.  To improve user repository management, we switched to storing
+# repositories by UUID, and aliasing them to names.  This makes it easy to
+# have rich name hierarchies, and allow users to rename repositories.
+#
+# This script will migrate a name-based Gitolite configuration to a UUID-based
+# one.  To use it:
+#
+# 1. Change the value of REPOS_DIR below, if needed.
+# 2. Install this script in the same directory as `update-gitolite.rb`.
+# 3. Ensure that no *other* users can access Gitolite: edit gitolite's
+#    authorized_keys file so it only contains the arvados_git_user key,
+#    and disable the update-gitolite cron job.
+# 4. Run this script: `ruby migrate-gitolite-to-uuid-storage.rb production`.
+# 5. Undo step 3.
+
+require 'rubygems'
+require 'pp'
+require 'arvados'
+require 'tempfile'
+require 'yaml'
+
+REPOS_DIR = "/var/lib/gitolite/repositories"
+
+# Default is development
+production = ARGV[0] == "production"
+
+ENV["RAILS_ENV"] = "development"
+ENV["RAILS_ENV"] = "production" if production
+
+DEBUG = 1
+
+# load and merge in the environment-specific application config info
+# if present, overriding base config parameters as specified
+path = File.dirname(__FILE__) + '/config/arvados-clients.yml'
+if File.exists?(path) then
+  cp_config = YAML.load_file(path)[ENV['RAILS_ENV']]
+else
+  puts "Please create a\n " + File.dirname(__FILE__) + "/config/arvados-clients.yml\n file"
+  exit 1
+end
+
+gitolite_url = cp_config['gitolite_url']
+gitolite_arvados_git_user_key = cp_config['gitolite_arvados_git_user_key']
+
+gitolite_tmpdir = File.join(File.absolute_path(File.dirname(__FILE__)),
+                            cp_config['gitolite_tmp'])
+gitolite_admin = File.join(gitolite_tmpdir, 'gitolite-admin')
+gitolite_keydir = File.join(gitolite_admin, 'keydir', 'arvados')
+
+ENV['ARVADOS_API_HOST'] = cp_config['arvados_api_host']
+ENV['ARVADOS_API_TOKEN'] = cp_config['arvados_api_token']
+if cp_config['arvados_api_host_insecure']
+  ENV['ARVADOS_API_HOST_INSECURE'] = 'true'
+else
+  ENV.delete('ARVADOS_API_HOST_INSECURE')
+end
+
+def ensure_directory(path, mode)
+  begin
+    Dir.mkdir(path, mode)
+  rescue Errno::EEXIST
+  end
+end
+
+def replace_file(path, contents)
+  unlink_now = true
+  dirname, basename = File.split(path)
+  new_file = Tempfile.new([basename, ".tmp"], dirname)
+  begin
+    new_file.write(contents)
+    new_file.flush
+    File.rename(new_file, path)
+    unlink_now = false
+  ensure
+    new_file.close(unlink_now)
+  end
+end
+
+def file_has_contents?(path, contents)
+  begin
+    IO.read(path) == contents
+  rescue Errno::ENOENT
+    false
+  end
+end
+
+module TrackCommitState
+  module ClassMethods
+    # Note that all classes that include TrackCommitState will have
+    # @@need_commit = true if any of them set it.  Since this flag reports
+    # a boolean state of the underlying git repository, that's OK in the
+    # current implementation.
+    @@need_commit = false
+
+    def changed?
+      @@need_commit
+    end
+
+    def ensure_in_git(path, contents)
+      unless file_has_contents?(path, contents)
+        replace_file(path, contents)
+        system("git", "add", path)
+        @@need_commit = true
+      end
+    end
+  end
+
+  def ensure_in_git(path, contents)
+    self.class.ensure_in_git(path, contents)
+  end
+
+  def self.included(base)
+    base.extend(ClassMethods)
+  end
+end
+
+class Repository
+  include TrackCommitState
+
+  @@aliases = {}
+
+  def initialize(arv_repo)
+    @arv_repo = arv_repo
+  end
+
+  def self.ensure_system_config(conf_root)
+    ensure_in_git(File.join(conf_root, "arvadosaliases.pl"), alias_config)
+  end
+
+  def self.rename_repos(repos_root)
+    @@aliases.each_pair do |uuid, name|
+      begin
+        File.rename(File.join(repos_root, "#{name}.git/"),
+                    File.join(repos_root, "#{uuid}.git"))
+      rescue Errno::ENOENT
+      end
+      if name == "arvados"
+        Dir.chdir(repos_root) { File.symlink("#{uuid}.git/", "arvados.git") }
+      end
+    end
+  end
+
+  def ensure_config(conf_root)
+    return if name.nil?
+    @@aliases[uuid] = name
+    name_conf_path = auto_conf_path(conf_root, name)
+    return unless File.exist?(name_conf_path)
+    conf_file = IO.read(name_conf_path)
+    conf_file.gsub!(/^repo #{Regexp.escape(name)}$/m, "repo #{uuid}")
+    ensure_in_git(auto_conf_path(conf_root, uuid), conf_file)
+    File.unlink(name_conf_path)
+    system("git", "rm", "--quiet", name_conf_path)
+  end
+
+  private
+
+  def auto_conf_path(conf_root, basename)
+    File.join(conf_root, "conf", "auto", "#{basename}.conf")
+  end
+
+  def uuid
+    @arv_repo[:uuid]
+  end
+
+  def name
+    if @arv_repo[:name].nil?
+      nil
+    else
+      @clean_name ||=
+        @arv_repo[:name].sub(/^[^A-Za-z]+/, "").gsub(/[^\w\.\/]/, "")
+    end
+  end
+
+  def self.alias_config
+    conf_s = "{\n"
+    @@aliases.sort.each do |(repo_name, repo_uuid)|
+      conf_s += "\t'#{repo_name}' \t=> '#{repo_uuid}',\n"
+    end
+    conf_s += "};\n"
+    conf_s
+  end
+end
+
+begin
+  # Get our local gitolite-admin repo up to snuff
+  if not File.exists?(gitolite_admin) then
+    ensure_directory(gitolite_tmpdir, 0700)
+    Dir.chdir(gitolite_tmpdir)
+    `git clone #{gitolite_url}`
+    Dir.chdir(gitolite_admin)
+  else
+    Dir.chdir(gitolite_admin)
+    `git pull`
+  end
+
+  arv = Arvados.new
+  permissions = arv.repository.get_all_permissions
+
+  permissions[:repositories].each do |repo_record|
+    repo = Repository.new(repo_record)
+    repo.ensure_config(gitolite_admin)
+  end
+  Repository.ensure_system_config(gitolite_admin)
+
+  message = "#{Time.now().to_s}: migrate to storing repositories by UUID"
+  Dir.chdir(gitolite_admin)
+  `git add --all`
+  `git commit -m '#{message}'`
+  Repository.rename_repos(REPOS_DIR)
+  `git push`
+
+rescue => bang
+  puts "Error: " + bang.to_s
+  puts bang.backtrace.join("\n")
+  exit 1
+end
+
diff --git a/services/api/script/salvage_collection.rb b/services/api/script/salvage_collection.rb

new file mode 100755 (executable)

index 0000000..b70807b
--- /dev/null
+++ b/services/api/script/salvage_collection.rb
@@ -0,0 +1,26 @@
+#!/usr/bin/env ruby
+
+# Take two input parameters: a collection uuid and reason
+# Get "src_collection" with the given uuid
+# Create a new collection with:
+#   src_collection.manifest_text as "invalid_manifest_text.txt"
+#   Locators from src_collection.manifest_text as "salvaged_data"
+# Update src_collection:
+#   Set src_collection.manifest_text to: ""
+#   Append to src_collection.name: " (reason; salvaged data at new_collection.uuid)"
+#   Set portable_data_hash to "d41d8cd98f00b204e9800998ecf8427e+0"
+
+require 'trollop'
+require './lib/salvage_collection'
+
+opts = Trollop::options do
+  banner ''
+  banner "Usage: salvage_collection.rb " +
+    "{uuid} {reason}"
+  banner ''
+  opt :uuid, "uuid of the collection to be salvaged.", type: :string, required: true
+  opt :reason, "Reason for salvaging.", type: :string, required: false
+end
+
+# Salvage the collection with the given uuid
+SalvageCollection.salvage_collection opts.uuid, opts.reason
diff --git a/services/api/test/fixtures/api_client_authorizations.yml b/services/api/test/fixtures/api_client_authorizations.yml

index 0b4d8747ea35bdd66b66182215d821058b917dfe..9199d178f6bcdfec3c8536d8da9f7e6b22613898 100644 (file)
--- a/services/api/test/fixtures/api_client_authorizations.yml
+++ b/services/api/test/fixtures/api_client_authorizations.yml
@@ -1,5 +1,11 @@
  # Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/Fixtures.html
  
+system_user:
+  api_client: untrusted
+  user: system_user
+  api_token: systemusertesttoken1234567890aoeuidhtnsqjkxbmwvzpy
+  expires_at: 2038-01-01 00:00:00
+
  admin:
    api_client: untrusted
    user: admin
@@ -181,12 +187,18 @@ active_no_prefs:
    api_token: 3kg612cdc0f3415c2428b9758f33bdfb07bc3561b00e86qdmi
    expires_at: 2038-01-01 00:00:00
  
-active_no_prefs_profile:
+active_no_prefs_profile_no_getting_started_shown:
    api_client: untrusted
-  user: active_no_prefs_profile
+  user: active_no_prefs_profile_no_getting_started_shown
    api_token: 3kg612cdc0f3415c242856758f33bdfb07bc3561b00e86qdmi
    expires_at: 2038-01-01 00:00:00
  
+active_no_prefs_profile_with_getting_started_shown:
+  api_client: untrusted
+  user: active_no_prefs_profile_with_getting_started_shown
+  api_token: 3kg612cdc0f3415c245786758f33bdfb07babcd1b00e86qdmi
+  expires_at: 2038-01-01 00:00:00
+
  user_foo_in_sharing_group:
    api_client: untrusted
    user: user_foo_in_sharing_group
diff --git a/services/api/test/fixtures/collections.yml b/services/api/test/fixtures/collections.yml

index c42a4306c6a19d6288049f18537633b26e405337..d7f6f92f186341d9da550ff6b4e95f132787f04f 100644 (file)
--- a/services/api/test/fixtures/collections.yml
+++ b/services/api/test/fixtures/collections.yml
@@ -79,7 +79,7 @@ multilevel_collection_1:
    modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
    modified_at: 2014-02-03T17:22:54Z
    updated_at: 2014-02-03T17:22:54Z
-  manifest_text: ". 0:0:file1 0:0:file2 0:0:file3\n./dir1 0:0:file1 0:0:file2 0:0:file3\n./dir1/subdir 0:0:file1 0:0:file2 0:0:file3\n./dir2 0:0:file1 0:0:file2 0:0:file3\n"
+  manifest_text: ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2 0:0:file3\n./dir1 d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2 0:0:file3\n./dir1/subdir d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2 0:0:file3\n./dir2 d41d8cd98f00b204e9800998ecf8427e+0 0:0:file1 0:0:file2 0:0:file3\n"
    name: multilevel_collection_1
  
  multilevel_collection_2:
diff --git a/services/api/test/fixtures/groups.yml b/services/api/test/fixtures/groups.yml

index 7b4f8be6dc8d2fcaf6f5acbf36d96b339c3a3385..f6b99a06617a860d9d4c6681b60c2861b426d4e1 100644 (file)
--- a/services/api/test/fixtures/groups.yml
+++ b/services/api/test/fixtures/groups.yml
@@ -127,6 +127,14 @@ anonymously_accessible_project:
    group_class: project
    description: An anonymously accessible project
  
+subproject_in_anonymous_accessible_project:
+  uuid: zzzzz-j7d0g-mhtfesvgmkolpyf
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  created_at: 2014-04-21 15:37:48 -0400
+  name: Subproject in anonymous accessible project
+  description: Description for subproject in anonymous accessible project
+  group_class: project
+
  active_user_has_can_manage:
    uuid: zzzzz-j7d0g-ptt1ou6a9lxrv07
    owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
@@ -248,3 +256,29 @@ project_owns_itself:
    description: ~
    updated_at: 2014-11-05 22:31:24.258093171 Z
    group_class: project
+
+# Used to test renaming when removed from the "asubproject" while
+# another such object with same name exists in home project.
+subproject_in_active_user_home_project_to_test_unique_key_violation:
+  uuid: zzzzz-j7d0g-subprojsamenam1
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2013-04-21 15:37:48 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_at: 2013-04-21 15:37:48 -0400
+  updated_at: 2013-04-21 15:37:48 -0400
+  name: Subproject to test owner uuid and name unique key violation upon removal
+  description: Subproject in active user home project to test owner uuid and name unique key violation upon removal
+  group_class: project
+
+subproject_in_asubproject_with_same_name_as_one_in_active_user_home:
+  uuid: zzzzz-j7d0g-subprojsamenam2
+  owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
+  created_at: 2013-04-21 15:37:48 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_at: 2013-04-21 15:37:48 -0400
+  updated_at: 2013-04-21 15:37:48 -0400
+  name: Subproject to test owner uuid and name unique key violation upon removal
+  description: "Removing this will result in name conflict with 'A project' in Home project and hence get renamed."
+  group_class: project
diff --git a/services/api/test/fixtures/humans.yml b/services/api/test/fixtures/humans.yml

new file mode 100644 (file)

index 0000000..9c17fd2
--- /dev/null
+++ b/services/api/test/fixtures/humans.yml
@@ -0,0 +1 @@
+# File exists to ensure the table gets cleared during DatabaseController#reset
diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml

index 78120042aa625bec49a4c5bdf27f0abeb00f572b..8a4c34557c4e45c3cf1e0ae22a675901e8b3754e 100644 (file)
--- a/services/api/test/fixtures/jobs.yml
+++ b/services/api/test/fixtures/jobs.yml
@@ -7,6 +7,8 @@ running:
    created_at: <%= 3.minute.ago.to_s(:db) %>
    started_at: <%= 3.minute.ago.to_s(:db) %>
    finished_at: ~
+  script: hash
+  repository: active/foo
    script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332
    running: true
    success: ~
@@ -31,6 +33,8 @@ running_cancelled:
    created_at: <%= 4.minute.ago.to_s(:db) %>
    started_at: <%= 3.minute.ago.to_s(:db) %>
    finished_at: ~
+  script: hash
+  repository: active/foo
    script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332
    running: true
    success: ~
@@ -56,6 +60,8 @@ uses_nonexistent_script_version:
    created_at: <%= 5.minute.ago.to_s(:db) %>
    started_at: <%= 3.minute.ago.to_s(:db) %>
    finished_at: <%= 2.minute.ago.to_s(:db) %>
+  script: hash
+  repository: active/foo
    running: false
    success: true
    output: d41d8cd98f00b204e9800998ecf8427e+0
@@ -77,7 +83,7 @@ foobar:
    cancelled_by_user_uuid: ~
    cancelled_by_client_uuid: ~
    script: hash
-  repository: foo
+  repository: active/foo
    script_version: 7def43a4d3f20789dda4700f703b5514cc3ed250
    script_parameters:
      input: 1f4b0bc7583c2a7f9102c395f4ffc5e3+45
@@ -113,7 +119,7 @@ barbaz:
    finished_at: <%= 2.minute.ago.to_s(:db) %>
    running: false
    success: true
-  repository: foo
+  repository: active/foo
    output: ea10d51bcf88862dbcc36eb292017dfd+45
    priority: 0
    log: d41d8cd98f00b204e9800998ecf8427e+0
@@ -141,7 +147,7 @@ runningbarbaz:
    finished_at: <%= 2.minute.ago.to_s(:db) %>
    running: true
    success: ~
-  repository: foo
+  repository: active/foo
    output: ea10d51bcf88862dbcc36eb292017dfd+45
    priority: 0
    log: d41d8cd98f00b204e9800998ecf8427e+0
@@ -157,14 +163,32 @@ runningbarbaz:
  previous_job_run:
    uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
    created_at: <%= 14.minute.ago.to_s(:db) %>
+  finished_at: <%= 13.minutes.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    script_parameters:
      input: fa7aeb5140e2848d39b416daeef4ffc5+45
      an_integer: "1"
    success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
+  output: ea10d51bcf88862dbcc36eb292017dfd+45
+  state: Complete
+
+previous_ancient_job_run:
+  uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  created_at: <%= 366.days.ago.to_s(:db) %>
+  finished_at: <%= 365.days.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
+  script: hash
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    an_integer: "2"
+  success: true
+  log: d41d8cd98f00b204e9800998ecf8427e+0
    output: ea10d51bcf88862dbcc36eb292017dfd+45
    state: Complete
  
@@ -172,24 +196,41 @@ previous_docker_job_run:
    uuid: zzzzz-8i9sb-k6emstgk4kw4yhi
    created_at: <%= 14.minute.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    script_parameters:
      input: fa7aeb5140e2848d39b416daeef4ffc5+45
      an_integer: "1"
    runtime_constraints:
-    docker_image: arvados/test
+    docker_image: arvados/apitestfixture
    success: true
    output: ea10d51bcf88862dbcc36eb292017dfd+45
    docker_image_locator: fa3c1a9cb6783f85f2ecda037e07b8c3+167
    state: Complete
  
+previous_ancient_docker_image_job_run:
+  uuid: zzzzz-8i9sb-t3b460aolxxuldl
+  created_at: <%= 144.minute.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
+  script: hash
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    an_integer: "2"
+  runtime_constraints:
+    docker_image: arvados/apitestfixture
+  success: true
+  output: ea10d51bcf88862dbcc36eb292017dfd+45
+  docker_image_locator: b519d9cb706a29fc7ea24dbea2f05851+93
+  state: Complete
+
  previous_job_run_with_arvados_sdk_version:
    uuid: zzzzz-8i9sb-eoo0321or2dw2jg
    created_at: <%= 14.minute.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 31ce37fe365b3dc204300a3e4c396ad333ed0556
    script_parameters:
@@ -197,7 +238,9 @@ previous_job_run_with_arvados_sdk_version:
      an_integer: "1"
    runtime_constraints:
      arvados_sdk_version: commit2
+    docker_image: arvados/apitestfixture
    arvados_sdk_version: 00634b2b8a492d6f121e3cf1d6587b821136a9a7
+  docker_image_locator: fa3c1a9cb6783f85f2ecda037e07b8c3+167
    success: true
    output: ea10d51bcf88862dbcc36eb292017dfd+45
    state: Complete
@@ -206,7 +249,7 @@ previous_job_run_no_output:
    uuid: zzzzz-8i9sb-cjs4pklxxjykppp
    created_at: <%= 14.minute.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    script_parameters:
@@ -216,11 +259,25 @@ previous_job_run_no_output:
    output: ~
    state: Complete
  
+previous_job_run_superseded_by_hash_branch:
+  # This supplied_script_version is a branch name with later commits.
+  uuid: zzzzz-8i9sb-aeviezu5dahph3e
+  created_at: <%= 15.minute.ago.to_s(:db) %>
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/shabranchnames
+  script: testscript
+  script_version: 7387838c69a21827834586cc42b467ff6c63293b
+  supplied_script_version: 738783
+  script_parameters: {}
+  success: true
+  output: d41d8cd98f00b204e9800998ecf8427e+0
+  state: Complete
+
  nondeterminisic_job_run:
    uuid: zzzzz-8i9sb-cjs4pklxxjykyyy
    created_at: <%= 14.minute.ago.to_s(:db) %>
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: hash2
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    script_parameters:
@@ -311,7 +368,7 @@ job_in_subproject:
    created_at: 2014-10-15 12:00:00
    owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
    log: ~
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Complete
@@ -343,7 +400,7 @@ running_will_be_completed:
  graph_stage1:
    uuid: zzzzz-8i9sb-graphstage10000
    owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
-  repository: foo
+  repository: active/foo
    script: hash
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Complete
@@ -352,7 +409,7 @@ graph_stage1:
  graph_stage2:
    uuid: zzzzz-8i9sb-graphstage20000
    owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
-  repository: foo
+  repository: active/foo
    script: hash2
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Complete
@@ -364,7 +421,7 @@ graph_stage2:
  graph_stage3:
    uuid: zzzzz-8i9sb-graphstage30000
    owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
-  repository: foo
+  repository: active/foo
    script: hash2
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Complete
@@ -380,7 +437,7 @@ job_with_latest_version:
    cancelled_by_user_uuid: ~
    cancelled_by_client_uuid: ~
    script: hash
-  repository: foo
+  repository: active/foo
    script_version: 7def43a4d3f20789dda4700f703b5514cc3ed250
    supplied_script_version: master
    script_parameters:
@@ -406,7 +463,7 @@ running_job_in_publicly_accessible_project:
    uuid: zzzzz-8i9sb-n7omg50bvt0m1nf
    owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
    modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
    script: running_job_script
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Running
@@ -418,13 +475,29 @@ completed_job_in_publicly_accessible_project:
    uuid: zzzzz-8i9sb-jyq01m7in1jlofj
    owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
    modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
-  repository: foo
+  repository: active/foo
+  script: completed_job_script
+  script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
+  state: Complete
+  script_parameters:
+    input: fa7aeb5140e2848d39b416daeef4ffc5+45
+    input2: "stuff2"
+  log: ~
+  output: b519d9cb706a29fc7ea24dbea2f05851+93
+
+job_in_publicly_accessible_project_but_other_objects_elsewhere:
+  uuid: zzzzz-8i9sb-jyq01muyhgr4ofj
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  repository: active/foo
    script: completed_job_script
    script_version: 4fe459abe02d9b365932b8f5dc419439ab4e2577
    state: Complete
    script_parameters:
      input: fa7aeb5140e2848d39b416daeef4ffc5+45
      input2: "stuff2"
+  log: zzzzz-4zz18-fy296fx3hot09f7
+  output: zzzzz-4zz18-bv31uwvy3neko21
  
  # Test Helper trims the rest of the file
  
diff --git a/services/api/test/fixtures/links.yml b/services/api/test/fixtures/links.yml

index b8856efd38cea9d0e32677a87ceff1a8877a0077..925e4661248279b1543052fd8f8cc563e5efc8d8 100644 (file)
--- a/services/api/test/fixtures/links.yml
+++ b/services/api/test/fixtures/links.yml
@@ -405,6 +405,20 @@ admin_can_write_aproject:
    head_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
    properties: {}
  
+project_viewer_member_of_all_users_group:
+  uuid: zzzzz-o0j2j-cdnq6627g0h0r2x
+  owner_uuid: zzzzz-tpzed-000000000000000
+  created_at: 2015-07-28T21:34:41.361747000Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2015-07-28T21:34:41.361747000Z
+  updated_at: 2015-07-28T21:34:41.361747000Z
+  tail_uuid: zzzzz-tpzed-projectviewer1a
+  link_class: permission
+  name: can_read
+  head_uuid: zzzzz-j7d0g-fffffffffffffff
+  properties: {}
+
  project_viewer_can_read_project:
    uuid: zzzzz-o0j2j-projviewerreadp
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -643,6 +657,36 @@ ancient_docker_image_collection_hash:
    properties:
      image_timestamp: "2010-06-10T14:30:00.184019565Z"
  
+ancient_docker_image_collection_tag:
+  uuid: zzzzz-o0j2j-dockercolltagzz
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2014-06-12 14:30:00.184389725 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2014-06-12 14:30:00.184019565 Z
+  updated_at: 2014-06-12 14:30:00.183829316 Z
+  link_class: docker_image_repo+tag
+  name: arvados/apitestfixture:latest
+  tail_uuid: ~
+  head_uuid: zzzzz-4zz18-t68oksiu9m80s4y
+  properties:
+    image_timestamp: "2010-06-10T14:30:00.184019565Z"
+
+docker_image_tag_like_hash:
+  uuid: zzzzz-o0j2j-dockerhashtagaa
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2014-06-11 14:30:00.184389725 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-000000000000000
+  modified_at: 2014-06-11 14:30:00.184019565 Z
+  updated_at: 2014-06-11 14:30:00.183829316 Z
+  link_class: docker_image_repo+tag
+  name: aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa:latest
+  tail_uuid: ~
+  head_uuid: zzzzz-4zz18-1v45jub259sjjgb
+  properties:
+    image_timestamp: "2014-06-10T14:30:00.184019565Z"
+
  job_reader_can_read_previous_job_run:
    # Permission link giving job_reader permission
    # to read previous_job_run
@@ -773,6 +817,49 @@ auto_setup_vm_login_username_can_login_to_test_vm:
    properties: {username: 'auto_setup_vm_login'}
    updated_at: 2014-08-06 22:11:51.242010312 Z
  
+admin_can_login_to_testvm2:
+  uuid: zzzzz-o0j2j-peek9mecohgh3ai
+  owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  created_at: 2014-08-06 22:11:51.242392533 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2014-08-06 22:11:51.242150425 Z
+  tail_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  link_class: permission
+  name: can_login
+  head_uuid: zzzzz-2x53u-382brsig8rp3065
+  # username is not obviously related to other user data.
+  properties: {username: 'adminroot', groups: ['docker', 'admin']}
+  updated_at: 2014-08-06 22:11:51.242010312 Z
+
+active_can_login_to_testvm2:
+  uuid: zzzzz-o0j2j-rah2ya1ohx9xaev
+  owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  created_at: 2014-08-06 22:11:51.242392533 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2014-08-06 22:11:51.242150425 Z
+  tail_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  link_class: permission
+  name: can_login
+  head_uuid: zzzzz-2x53u-382brsig8rp3065
+  # No groups.
+  properties: {username: 'active'}
+  updated_at: 2014-08-06 22:11:51.242010312 Z
+
+spectator_login_link_for_testvm2_without_username:
+  uuid: zzzzz-o0j2j-aem0eilie1jigh9
+  owner_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  created_at: 2014-08-06 22:11:51.242392533 Z
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-d9tiejq69daie8f
+  modified_at: 2014-08-06 22:11:51.242150425 Z
+  tail_uuid: zzzzz-tpzed-l1s2piq4t4mps8r
+  link_class: permission
+  name: can_login
+  head_uuid: zzzzz-2x53u-382brsig8rp3065
+  updated_at: 2014-08-06 22:11:51.242010312 Z
+
  user_foo_can_read_sharing_group:
    uuid: zzzzz-o0j2j-gdpvwvpj9kjs5in
    owner_uuid: zzzzz-tpzed-000000000000000
diff --git a/services/api/test/fixtures/logs.yml b/services/api/test/fixtures/logs.yml

index 06f7a021d6e6edf7de2f1ae31f28cdcc55087c16..9179e6dff92a4c62a0271dd78786b98dc726fef4 100644 (file)
--- a/services/api/test/fixtures/logs.yml
+++ b/services/api/test/fixtures/logs.yml
@@ -63,3 +63,79 @@ crunchstat_for_running_job:
    updated_at: 2014-11-07 23:33:42.347455000 Z
    modified_at: 2014-11-07 23:33:42.347455000 Z
    object_owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+
+log_line_for_pipeline_in_publicly_accessible_project:
+  id: 8
+  uuid: zzzzz-57u5n-tmymyrojrjyhb45
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-d1hrv-n68vc490mloy4fi
+  event_at: 2014-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2014-11-07_23:33:41 zzzzz-d1hrv-n68vc490mloy4fi 31708 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2014-11-07 23:33:42.351913000 Z
+  updated_at: 2014-11-07 23:33:42.347455000 Z
+  modified_at: 2014-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+
+log_line_for_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
+  id: 9
+  uuid: zzzzz-57u5n-tmyhy56k9lnhb45
+  owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-d1hrv-pisharednotobjs
+  event_at: 2014-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2014-11-07_23:33:41 zzzzz-d1hrv-pisharednotobjs 31708 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2014-11-07 23:33:42.351913000 Z
+  updated_at: 2014-11-07 23:33:42.347455000 Z
+  modified_at: 2014-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-v955i6s2oi1cbso
+
+crunchstat_for_previous_job:
+  id: 10
+  uuid: zzzzz-57u5n-eir3aesha3kaene
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+  event_at: 2014-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2014-11-07_23:33:41 zzzzz-8i9sb-cjs4pklxxjykqqq 11592 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2014-11-07 23:33:42.351913000 Z
+  updated_at: 2014-11-07 23:33:42.347455000 Z
+  modified_at: 2014-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
+
+crunchstat_for_ancient_job:
+  id: 11
+  uuid: zzzzz-57u5n-ixioph7ieb5ung8
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  modified_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  object_uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+  event_at: 2013-11-07 23:33:42.347455000 Z
+  event_type: stderr
+  summary: ~
+  properties:
+    text: '2013-11-07_23:33:41 zzzzz-8i9sb-ahd7cie8jah9qui 29610 1 stderr crunchstat:
+      cpu 1935.4300 user 59.4100 sys 8 cpus -- interval 10.0002 seconds 12.9900 user
+      0.9900 sys'
+  created_at: 2013-11-07 23:33:42.351913000 Z
+  updated_at: 2013-11-07 23:33:42.347455000 Z
+  modified_at: 2013-11-07 23:33:42.347455000 Z
+  object_owner_uuid: zzzzz-j7d0g-xurymjxw79nv3jz
diff --git a/services/api/test/fixtures/nodes.yml b/services/api/test/fixtures/nodes.yml

index 15115012fac5027c9ce8060e85cb54d297c40923..6ca22e152615c30180de4fc07a5e6d19c2b8ed12 100644 (file)
--- a/services/api/test/fixtures/nodes.yml
+++ b/services/api/test/fixtures/nodes.yml
@@ -54,3 +54,27 @@ was_idle_now_down:
      ping_secret: "1bd1yi0x4lb5q4gzqqtrnq30oyj08r8dtdimmanbqw49z1anz2"
      slurm_state: "idle"
      total_cpu_cores: 16
+
+new_with_no_hostname:
+  uuid: zzzzz-7ekkf-newnohostname00
+  owner_uuid: zzzzz-tpzed-000000000000000
+  hostname: ~
+  slot_number: ~
+  ip_address: 172.17.2.173
+  last_ping_at: ~
+  first_ping_at: ~
+  job_uuid: ~
+  info:
+    ping_secret: "abcdyi0x4lb5q4gzqqtrnq30oyj08r8dtdimmanbqw49z1anz2"
+
+new_with_custom_hostname:
+  uuid: zzzzz-7ekkf-newwithhostname
+  owner_uuid: zzzzz-tpzed-000000000000000
+  hostname: custom1
+  slot_number: 23
+  ip_address: 172.17.2.173
+  last_ping_at: ~
+  first_ping_at: ~
+  job_uuid: ~
+  info:
+    ping_secret: "abcdyi0x4lb5q4gzqqtrnq30oyj08r8dtdimmanbqw49z1anz2"
diff --git a/services/api/test/fixtures/pipeline_instances.yml b/services/api/test/fixtures/pipeline_instances.yml

index df010353a8db60223f882cdbe70c7cf6ba62c92e..41a7fc9720e77292721800004babca120ae45480 100644 (file)
--- a/services/api/test/fixtures/pipeline_instances.yml
+++ b/services/api/test/fixtures/pipeline_instances.yml
@@ -254,6 +254,34 @@ pipeline_in_publicly_accessible_project:
    uuid: zzzzz-d1hrv-n68vc490mloy4fi
    owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
    name: Pipeline in publicly accessible project
+  pipeline_template_uuid: zzzzz-p5p6p-tmpltpublicproj
+  state: Complete
+  created_at: <%= 1.minute.ago.to_s(:db) %>
+  components:
+    foo:
+      script: foo
+      script_version: master
+      script_parameters:
+        input:
+          required: true
+          dataclass: Collection
+          title: foo instance input
+      job:
+        uuid: zzzzz-8i9sb-jyq01m7in1jlofj
+        repository: active/foo
+        script: foo
+        script_version: master
+        script_parameters:
+          input: zzzzz-4zz18-4en62shvi99lxd4
+        log: zzzzz-4zz18-4en62shvi99lxd4
+        output: b519d9cb706a29fc7ea24dbea2f05851+93
+        state: Complete
+
+pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
+  uuid: zzzzz-d1hrv-pisharednotobjs
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Pipeline in public project with other objects elsewhere
+  pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
    state: Complete
    created_at: 2014-09-15 12:00:00
    components:
@@ -265,10 +293,70 @@ pipeline_in_publicly_accessible_project:
            required: true
            dataclass: Collection
            title: foo instance input
+      job:
+        uuid: zzzzz-8i9sb-aceg2bnq7jt7kon
+        repository: active/foo
+        script: foo
+        script_version: master
+        script_parameters:
+          input: zzzzz-4zz18-bv31uwvy3neko21
+        log: zzzzz-4zz18-bv31uwvy3neko21
+        output: zzzzz-4zz18-bv31uwvy3neko21
+        state: Complete
+
+new_pipeline_in_publicly_accessible_project:
+  uuid: zzzzz-d1hrv-newpisharedobjs
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Pipeline in New state in publicly accessible project
+  pipeline_template_uuid: zzzzz-p5p6p-tmpltpublicproj
+  state: New
+  created_at: 2014-09-15 12:00:00
+  components:
+    foo:
+      script: foo
+      script_version: master
+      script_parameters:
+        input:
+          required: true
+          dataclass: Collection
+          value: b519d9cb706a29fc7ea24dbea2f05851+93
+
+new_pipeline_in_publicly_accessible_project_but_other_objects_elsewhere:
+  uuid: zzzzz-d1hrv-newsharenotobjs
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Pipeline in New state in public project with objects elsewhere
+  pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
+  state: New
+  created_at: 2014-09-15 12:00:00
+  components:
+    foo:
+      script: foo
+      script_version: master
+      script_parameters:
+        input:
+          required: true
+          dataclass: Collection
+          value: zzzzz-4zz18-bv31uwvy3neko21
+
+new_pipeline_in_publicly_accessible_project_with_dataclass_file_and_other_objects_elsewhere:
+  uuid: zzzzz-d1hrv-newsharenotfile
+  owner_uuid: zzzzz-j7d0g-zhxawtyetzwc5f0
+  name: Pipeline in public project in New state with file type data class with objects elsewhere
+  pipeline_template_uuid: zzzzz-p5p6p-aox0k0ofxrystgw
+  state: New
+  created_at: 2014-09-15 12:00:00
+  components:
+    foo:
+      script: foo
+      script_version: master
+      script_parameters:
+        input:
+          required: true
+          dataclass: File
+          value: zzzzz-4zz18-bv31uwvy3neko21/bar
  
  pipeline_in_running_state:
    name: running_with_job
-  state: Ready
    uuid: zzzzz-d1hrv-runningpipeline
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
    created_at: <%= 3.1.minute.ago.to_s(:db) %>
@@ -283,6 +371,47 @@ pipeline_in_running_state:
        uuid: zzzzz-8i9sb-pshmckwoma9plh7
        script_version: master
  
+running_pipeline_with_complete_job:
+  uuid: zzzzz-d1hrv-partdonepipelin
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: RunningOnServer
+  components:
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   running:
+    job:
+      uuid: zzzzz-8i9sb-pshmckwoma9plh7
+
+complete_pipeline_with_two_jobs:
+  uuid: zzzzz-d1hrv-twodonepipeline
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Complete
+  components:
+   ancient:
+    job:
+      uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+
+failed_pipeline_with_two_jobs:
+  uuid: zzzzz-d1hrv-twofailpipeline
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  state: Failed
+  components:
+   ancient:
+    job:
+      uuid: zzzzz-8i9sb-ahd7cie8jah9qui
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+   previous:
+    job:
+      uuid: zzzzz-8i9sb-cjs4pklxxjykqqq
+      log: zzzzz-4zz18-op4e2lbej01tcvu
+
  # Test Helper trims the rest of the file
  
  # Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper
diff --git a/services/api/test/fixtures/pipeline_templates.yml b/services/api/test/fixtures/pipeline_templates.yml

index 40bf63dd7e2108b490841c940e12a7aa34253432..cbd82de9241101a72cc1c263903b9a403a8234fa 100644 (file)
--- a/services/api/test/fixtures/pipeline_templates.yml
+++ b/services/api/test/fixtures/pipeline_templates.yml
@@ -203,3 +203,43 @@ pipeline_template_in_publicly_accessible_project:
            dataclass: Collection
            title: "default input"
            description: "input collection"
+
+# Used to test renaming when removed from the "aproject" subproject
+# while another such object with same name exists in home project.
+template_in_active_user_home_project_to_test_unique_key_violation:
+  uuid: zzzzz-p5p6p-templatsamenam1
+  owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  created_at: 2013-04-14 12:35:04 -0400
+  updated_at: 2013-04-14 12:35:04 -0400
+  modified_at: 2013-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Template to test owner uuid and name unique key violation upon removal
+  components:
+    script: foo
+    script_version: master
+    script_parameters:
+      input:
+        required: true
+        dataclass: Collection
+        title: "Foo/bar pair"
+        description: "Provide a collection containing at least two files."
+
+template_in_asubproject_with_same_name_as_one_in_active_user_home:
+  uuid: zzzzz-p5p6p-templatsamenam2
+  owner_uuid: zzzzz-j7d0g-axqo7eu9pwvna1x
+  created_at: 2013-04-14 12:35:04 -0400
+  updated_at: 2013-04-14 12:35:04 -0400
+  modified_at: 2013-04-14 12:35:04 -0400
+  modified_by_client_uuid: zzzzz-ozdt8-brczlopd8u8d0jr
+  modified_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+  name: Template to test owner uuid and name unique key violation upon removal
+  components:
+    script: foo
+    script_version: master
+    script_parameters:
+      input:
+        required: true
+        dataclass: Collection
+        title: "Foo/bar pair"
+        description: "Provide a collection containing at least two files."
diff --git a/services/api/test/fixtures/repositories.yml b/services/api/test/fixtures/repositories.yml

index a0e3b1f480f95aff5421d30fba7b3a446a03c568..a5aac1168b79e7818d026e2330a3f53363e38613 100644 (file)
--- a/services/api/test/fixtures/repositories.yml
+++ b/services/api/test/fixtures/repositories.yml
@@ -1,34 +1,48 @@
  crunch_dispatch_test:
    uuid: zzzzz-s0uqq-382brsig8rp3665
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
-  name: crunch_dispatch_test
+  name: active/crunchdispatchtest
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
  arvados:
    uuid: zzzzz-s0uqq-arvadosrepo0123
    owner_uuid: zzzzz-tpzed-000000000000000 # root
    name: arvados
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
  foo:
    uuid: zzzzz-s0uqq-382brsig8rp3666
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
-  name: foo
+  name: active/foo
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
  repository2:
    uuid: zzzzz-s0uqq-382brsig8rp3667
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
-  name: foo2
+  name: active/foo2
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
  repository3:
    uuid: zzzzz-s0uqq-38orljkqpyo1j61
    owner_uuid: zzzzz-tpzed-d9tiejq69daie8f # admin user
-  name: foo3
+  name: admin/foo3
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
  repository4:
    uuid: zzzzz-s0uqq-38oru8hnk57ht34
    owner_uuid: zzzzz-tpzed-d9tiejq69daie8f # admin user
-  name: foo4
+  name: admin/foo4
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
  
-auto_setup_repository:
-  uuid: zzzzz-s0uqq-382brabc8rp3667
+has_branch_with_commit_hash_name:
+  uuid: zzzzz-s0uqq-382brsig8rp3668
    owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz # active user
-  name: auto_setup_repo
+  name: active/shabranchnames
+  created_at: 2015-01-01T00:00:00.123456Z
+  modified_at: 2015-01-01T00:00:00.123456Z
diff --git a/services/api/test/fixtures/users.yml b/services/api/test/fixtures/users.yml

index c04aa47d2f34f779b8cdbaf3d0a196a499b00e27..db9607bc02476a5f3b5385665d9f02024178e3fc 100644 (file)
--- a/services/api/test/fixtures/users.yml
+++ b/services/api/test/fixtures/users.yml
@@ -25,10 +25,12 @@ admin:
    identity_url: https://admin.openid.local
    is_active: true
    is_admin: true
+  username: admin
    prefs:
      profile:
        organization: example.com
        role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  miniadmin:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -39,10 +41,12 @@ miniadmin:
    identity_url: https://miniadmin.openid.local
    is_active: true
    is_admin: false
+  username: miniadmin
    prefs:
      profile:
        organization: example.com
        role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  rominiadmin:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -53,10 +57,12 @@ rominiadmin:
    identity_url: https://rominiadmin.openid.local
    is_active: true
    is_admin: false
+  username: rominiadmin
    prefs:
      profile:
        organization: example.com
        role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  active:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -67,10 +73,12 @@ active:
    identity_url: https://active-user.openid.local
    is_active: true
    is_admin: false
+  username: active
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  project_viewer:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -81,10 +89,12 @@ project_viewer:
    identity_url: https://project-viewer.openid.local
    is_active: true
    is_admin: false
+  username: projectviewer
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  future_project_user:
    # Workbench tests give this user permission on aproject.
@@ -96,10 +106,12 @@ future_project_user:
    identity_url: https://future-project-user.openid.local
    is_active: true
    is_admin: false
+  username: futureprojectviewer
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  subproject_admin:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -110,10 +122,12 @@ subproject_admin:
    identity_url: https://subproject-admin.openid.local
    is_active: true
    is_admin: false
+  username: subprojectadmin
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  spectator:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -124,10 +138,12 @@ spectator:
    identity_url: https://spectator.openid.local
    is_active: true
    is_admin: false
+  username: spectator
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  inactive_uninvited:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -149,6 +165,7 @@ inactive:
    identity_url: https://inactive-user.openid.local
    is_active: false
    is_admin: false
+  username: inactiveuser
    prefs: {}
  
  inactive_but_signed_user_agreement:
@@ -164,6 +181,7 @@ inactive_but_signed_user_agreement:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  anonymous:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -184,10 +202,12 @@ job_reader:
    identity_url: https://spectator.openid.local
    is_active: true
    is_admin: false
+  username: jobber
    prefs:
      profile:
        organization: example.com
        role: Computational biologist
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  active_no_prefs:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -200,7 +220,7 @@ active_no_prefs:
    is_admin: false
    prefs: {}
  
-active_no_prefs_profile:
+active_no_prefs_profile_no_getting_started_shown:
    owner_uuid: zzzzz-tpzed-000000000000000
    uuid: zzzzz-tpzed-a46c98d1td4aoj4
    email: active_no_prefs_profile@arvados.local
@@ -212,6 +232,19 @@ active_no_prefs_profile:
    prefs:
      test: abc
  
+active_no_prefs_profile_with_getting_started_shown:
+  owner_uuid: zzzzz-tpzed-000000000000000
+  uuid: zzzzz-tpzed-getstartnoprofl
+  email: active_no_prefs_profile@arvados.local
+  first_name: HasPrefs
+  last_name: NoProfileWithGettingStartedShown
+  identity_url: https://active_no_prefs_profile_seen_gs.openid.local
+  is_active: true
+  is_admin: false
+  prefs:
+    test: abc
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
+
  # Fixtures to test granting and removing permissions.
  
  user_foo_in_sharing_group:
@@ -223,6 +256,7 @@ user_foo_in_sharing_group:
    identity_url: https://user_foo_in_sharing_group.openid.local
    is_active: true
    is_admin: false
+  username: fooinsharing
  
  user_bar_in_sharing_group:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -233,6 +267,7 @@ user_bar_in_sharing_group:
    identity_url: https://user_bar_in_sharing_group.openid.local
    is_active: true
    is_admin: false
+  username: barinsharing
  
  user1_with_load:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -243,10 +278,12 @@ user1_with_load:
    identity_url: https://user1_with_load.openid.local
    is_active: true
    is_admin: false
+  username: user1withload
    prefs:
      profile:
        organization: example.com
        role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
  
  fuse:
    owner_uuid: zzzzz-tpzed-000000000000000
@@ -257,7 +294,9 @@ fuse:
    identity_url: https://fuse.openid.local
    is_active: true
    is_admin: false
+  username: FUSE
    prefs:
      profile:
        organization: example.com
        role: IT
+    getting_started_shown: 2015-03-26 12:34:56.789000000 Z
diff --git a/services/api/test/functional/arvados/v1/collections_controller_test.rb b/services/api/test/functional/arvados/v1/collections_controller_test.rb

index 54ffe66f174baf341ae19a00a58c71b578f9b3ce..a8583be12bb70d915585c8c48aba0bc06aa32d3e 100644 (file)
--- a/services/api/test/functional/arvados/v1/collections_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/collections_controller_test.rb
@@ -91,6 +91,71 @@ class Arvados::V1::CollectionsControllerTest < ActionController::TestCase
      assert_equal 99999, resp['offset']
    end
  
+  def request_capped_index(params={})
+    authorize_with :user1_with_load
+    coll1 = collections(:collection_1_of_201)
+    Rails.configuration.max_index_database_read =
+      yield(coll1.manifest_text.size)
+    get :index, {
+      select: %w(uuid manifest_text),
+      filters: [["owner_uuid", "=", coll1.owner_uuid]],
+      limit: 300,
+    }.merge(params)
+  end
+
+  test "index with manifest_text limited by max_index_database_read returns non-empty" do
+    request_capped_index() { |_| 1 }
+    assert_response :success
+    assert_equal(1, json_response["items"].size)
+    assert_equal(1, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read size check follows same order as real query" do
+    authorize_with :user1_with_load
+    txt = '.' + ' d41d8cd98f00b204e9800998ecf8427e+0'*1000 + " 0:0:empty.txt\n"
+    c = Collection.create! manifest_text: txt, name: '0000000000000000000'
+    request_capped_index(select: %w(uuid manifest_text name),
+                         order: ['name asc'],
+                         filters: [['name','>=',c.name]]) do |_|
+      txt.length - 1
+    end
+    assert_response :success
+    assert_equal(1, json_response["items"].size)
+    assert_equal(1, json_response["limit"])
+    assert_equal(c.uuid, json_response["items"][0]["uuid"])
+    # The effectiveness of the test depends on >1 item matching the filters.
+    assert_operator(1, :<, json_response["items_available"])
+  end
+
+  test "index with manifest_text limited by max_index_database_read" do
+    request_capped_index() { |size| (size * 3) + 1 }
+    assert_response :success
+    assert_equal(3, json_response["items"].size)
+    assert_equal(3, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with limit" do
+    request_capped_index(limit: 5) { |size| size * 20 }
+    assert_response :success
+    assert_equal(5, json_response["items"].size)
+    assert_equal(5, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
+  test "max_index_database_read does not interfere with order" do
+    request_capped_index(select: %w(uuid manifest_text name),
+                         order: "name DESC") { |size| (size * 11) + 1 }
+    assert_response :success
+    assert_equal(11, json_response["items"].size)
+    assert_empty(json_response["items"].reject do |coll|
+                   coll["name"] =~ /^Collection_9/
+                 end)
+    assert_equal(11, json_response["limit"])
+    assert_equal(201, json_response["items_available"])
+  end
+
    test "admin can create collection with unsigned manifest" do
      authorize_with :admin
      test_collection = {
@@ -466,8 +531,8 @@ EOS
      }
  
      # Generate a locator with a bad signature.
-    unsigned_locator = "d41d8cd98f00b204e9800998ecf8427e+0"
-    bad_locator = unsigned_locator + "+Affffffff@ffffffff"
+    unsigned_locator = "acbd18db4cc2f85cedef654fccc4a4d8+3"
+    bad_locator = unsigned_locator + "+Affffffffffffffffffffffffffffffffffffffff@ffffffff"
      assert !Blob.verify_signature(bad_locator, signing_opts)
  
      # Creating a collection with this locator should
@@ -512,6 +577,16 @@ EOS
      assert_response 422
    end
  
+  test "reject manifest with unsigned block as stream name" do
+    authorize_with :active
+    post :create, {
+      collection: {
+        manifest_text: "00000000000000000000000000000000+1234 d41d8cd98f00b204e9800998ecf8427e+0 0:0:foo.txt\n"
+      }
+    }
+    assert_includes [422, 403], response.code.to_i
+  end
+
    test "multiple locators per line" do
      permit_unsigned_manifests
      authorize_with :active
@@ -734,4 +809,82 @@ EOS
      assert_not_nil json_response['uuid']
      assert_equal 'value_1', json_response['properties']['property_1']
    end
+
+  [
+    ". 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
+    "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+  ].each do |manifest_text|
+    test "create collection with invalid manifest #{manifest_text} and expect error" do
+      authorize_with :active
+      post :create, {
+        collection: {
+          manifest_text: manifest_text,
+          portable_data_hash: "d41d8cd98f00b204e9800998ecf8427e+0"
+        }
+      }
+      assert_response 422
+      response_errors = json_response['errors']
+      assert_not_nil response_errors, 'Expected error in response'
+      assert(response_errors.first.include?('Invalid manifest'),
+             "Expected 'Invalid manifest' error in #{response_errors.first}")
+    end
+  end
+
+  [
+    [nil, "d41d8cd98f00b204e9800998ecf8427e+0"],
+    ["", "d41d8cd98f00b204e9800998ecf8427e+0"],
+    [". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n", "d30fe8ae534397864cb96c544f4cf102+47"],
+  ].each do |manifest_text, pdh|
+    test "create collection with valid manifest #{manifest_text.inspect} and expect success" do
+      authorize_with :active
+      post :create, {
+        collection: {
+          manifest_text: manifest_text,
+          portable_data_hash: pdh
+        }
+      }
+      assert_response 200
+    end
+  end
+
+  [
+    ". 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
+    "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+  ].each do |manifest_text|
+    test "update collection with invalid manifest #{manifest_text} and expect error" do
+      authorize_with :active
+      post :update, {
+        id: 'zzzzz-4zz18-bv31uwvy3neko21',
+        collection: {
+          manifest_text: manifest_text,
+        }
+      }
+      assert_response 422
+      response_errors = json_response['errors']
+      assert_not_nil response_errors, 'Expected error in response'
+      assert(response_errors.first.include?('Invalid manifest'),
+             "Expected 'Invalid manifest' error in #{response_errors.first}")
+    end
+  end
+
+  [
+    nil,
+    "",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
+  ].each do |manifest_text|
+    test "update collection with valid manifest #{manifest_text.inspect} and expect success" do
+      authorize_with :active
+      post :update, {
+        id: 'zzzzz-4zz18-bv31uwvy3neko21',
+        collection: {
+          manifest_text: manifest_text,
+        }
+      }
+      assert_response 200
+    end
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/commits_controller_test.rb b/services/api/test/functional/arvados/v1/commits_controller_test.rb

index ceaebffb2305b2255b556c7c28519606f384910a..4af1c6eaa8b06fa803aa3f05d9e7cd6bb3058b59 100644 (file)
--- a/services/api/test/functional/arvados/v1/commits_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/commits_controller_test.rb
@@ -1,102 +1,4 @@
  require 'test_helper'
-require 'helpers/git_test_helper'
-
-# NOTE: calling Commit.find_commit_range(user, nil, nil, 'rev') will produce
-# an error message "fatal: bad object 'rev'" on stderr if 'rev' does not exist
-# in a given repository.  Many of these tests report such errors; their presence
-# does not represent a fatal condition.
-#
-# TODO(twp): consider better error handling of these messages, or
-# decide to abandon it.
  
  class Arvados::V1::CommitsControllerTest < ActionController::TestCase
-  fixtures :repositories, :users
-
-  # See git_setup.rb for the commit log for test.git.tar
-  include GitTestHelper
-
-  test "test_find_commit_range" do
-    authorize_with :active
-
-  # single
-    a = Commit.find_commit_range(users(:active), nil, nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_branch1" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, nil, 'master', nil)
-    assert_includes(a, 'f35f99b7d32bac257f5989df02b9f12ee1a9b0d6')
-    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
-
-  #test "test_branch2" do
-    a = Commit.find_commit_range(users(:active), 'foo', nil, 'b1', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_branch3" do
-    a = Commit.find_commit_range(users(:active), 'foo', nil, 'HEAD', nil)
-    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
-
-  #test "test_single_revision_repo" do
-    a = Commit.find_commit_range(users(:active), "foo", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-    a = Commit.find_commit_range(users(:active), "bar", nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
-    assert_equal nil, a
-
-  #test "test_multi_revision" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_tag" do
-    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
-    # not in the working tree."
-    a = Commit.find_commit_range(users(:active), nil, 'tag1', 'master', nil)
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
-
-  #test "test_multi_revision_exclude" do
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-  #test "test_multi_revision_tagged_exclude" do
-    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-    a = Commit.find_commit_range(users(:active), nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
-    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
-
-    Dir.mktmpdir do |touchdir|
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to maximum
-      a = Commit.find_commit_range(users(:active), nil, nil, "$(uname>#{touchdir}/uh_oh)", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to minimum
-      a = Commit.find_commit_range(users(:active), nil, "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-      # invalid input to 'excludes'
-      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
-      a = Commit.find_commit_range(users(:active), nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
-      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
-      assert_equal nil, a
-
-    end
-
-  end
-
  end
diff --git a/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb b/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb

index 9b66851d7e0dc8885876b0b7b179c7e063782ed8..64d559107c19257ca7a954d323f42cba60c7a9c2 100644 (file)
--- a/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/job_reuse_controller_test.rb
@@ -17,7 +17,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -35,7 +35,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        job: {
          script: "hash",
          script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "foo",
+        repository: "active/foo",
          script_parameters: {
            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
            an_integer: '1'
@@ -55,7 +55,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        job: {
          script: "hash",
          script_version: "tag1",
-        repository: "foo",
+        repository: "active/foo",
          script_parameters: {
            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
            an_integer: '1'
@@ -76,7 +76,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
          no_reuse: true,
          script: "hash",
          script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "foo",
+        repository: "active/foo",
          script_parameters: {
            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
            an_integer: '1'
@@ -96,7 +96,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
          job: {
            script: "hash",
            script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-          repository: "foo",
+          repository: "active/foo",
            script_parameters: {
              input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
              an_integer: '1'
@@ -118,7 +118,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        job: {
          script: "hash",
          script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-        repository: "foo",
+        repository: "active/foo",
          script_parameters: {
            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
            an_integer: '1'
@@ -138,7 +138,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '2'
@@ -156,7 +156,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        script: "hash",
        minimum_script_version: "tag1",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -174,7 +174,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -192,7 +192,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '2'
@@ -210,7 +210,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '2'
@@ -228,7 +228,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -247,7 +247,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash2",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -266,7 +266,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        no_reuse: false,
        script: "hash",
        script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
          an_integer: '1'
@@ -285,7 +285,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        script: "hash",
        minimum_script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
        exclude_script_versions: ["tag1"],
        script_parameters: {
          input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
@@ -305,7 +305,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
        job: {
          script: "hash",
          script_version: "master",
-        repository: "foo",
+        repository: "active/foo",
          script_parameters: {
            input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
            an_integer: '1'
@@ -323,8 +323,13 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                       new_job['script_version'])
    end
  
+  test "cannot reuse job when hash-like branch includes newer commit" do
+    check_new_job_created_from({job: {script_version: "738783"}},
+                               :previous_job_run_superseded_by_hash_branch)
+  end
+
    BASE_FILTERS = {
-    'repository' => ['=', 'foo'],
+    'repository' => ['=', 'active/foo'],
      'script' => ['=', 'hash'],
      'script_version' => ['in git', 'master'],
      'docker_image_locator' => ['=', nil],
@@ -342,7 +347,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "master",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -368,7 +373,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "master",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -391,7 +396,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -412,7 +417,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -442,7 +447,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -470,7 +475,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -495,7 +500,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             job: {
               script: "hash",
               script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-             repository: "foo",
+             repository: "active/foo",
               script_parameters: {
                 input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                 an_integer: '1'
@@ -510,6 +515,21 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
      assert_not_equal(jobs(:previous_docker_job_run).uuid, new_job.uuid)
    end
  
+  test "don't reuse job using older Docker image of same name" do
+    jobspec = {runtime_constraints: {
+        docker_image: "arvados/apitestfixture",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_ancient_docker_image_job_run)
+  end
+
+  test "reuse job with Docker image that has hash name" do
+    jobspec = {runtime_constraints: {
+        docker_image: "a" * 64,
+      }}
+    check_job_reused_from(jobspec, :previous_docker_job_run)
+  end
+
    ["repository", "script"].each do |skip_key|
      test "missing #{skip_key} filter raises an error" do
        filters = filters_from_hash(BASE_FILTERS.reject { |k| k == skip_key })
@@ -517,7 +537,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
               job: {
                 script: "hash",
                 script_version: "master",
-               repository: "foo",
+               repository: "active/foo",
                 script_parameters: {
                   input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
                   an_integer: '1'
@@ -532,7 +552,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
    end
  
    test "find Job with script version range" do
-    get :index, filters: [["repository", "=", "foo"],
+    get :index, filters: [["repository", "=", "active/foo"],
                            ["script", "=", "hash"],
                            ["script_version", "in git", "tag1"]]
      assert_response :success
@@ -542,7 +562,7 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
    end
  
    test "find Job with script version range exclusions" do
-    get :index, filters: [["repository", "=", "foo"],
+    get :index, filters: [["repository", "=", "active/foo"],
                            ["script", "=", "hash"],
                            ["script_version", "not in git", "tag1"]]
      assert_response :success
@@ -599,35 +619,52 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
                      jobs(:previous_docker_job_run).uuid)
    end
  
-  def create_foo_hash_job_params(params)
+  JOB_SUBMIT_KEYS = [:script, :script_parameters, :script_version, :repository]
+  DEFAULT_START_JOB = :previous_job_run
+
+  def create_job_params(params, start_from=DEFAULT_START_JOB)
      if not params.has_key?(:find_or_create)
        params[:find_or_create] = true
      end
      job_attrs = params.delete(:job) || {}
-    params[:job] = {
-      script: "hash",
-      script_version: "4fe459abe02d9b365932b8f5dc419439ab4e2577",
-      repository: "foo",
-      script_parameters: {
-        input: 'fa7aeb5140e2848d39b416daeef4ffc5+45',
-        an_integer: '1',
-      },
-    }.merge(job_attrs)
+    start_job = jobs(start_from)
+    params[:job] = Hash[JOB_SUBMIT_KEYS.map do |key|
+                          [key, start_job.send(key)]
+                        end]
+    params[:job][:runtime_constraints] =
+      job_attrs.delete(:runtime_constraints) || {}
+    { arvados_sdk_version: :arvados_sdk_version,
+      docker_image_locator: :docker_image }.each do |method, constraint_key|
+      if constraint_value = start_job.send(method)
+        params[:job][:runtime_constraints][constraint_key] ||= constraint_value
+      end
+    end
+    params[:job].merge!(job_attrs)
      params
    end
  
-  def check_new_job_created_from(params)
-    start_time = Time.now
-    post(:create, create_foo_hash_job_params(params))
+  def create_job_from(params, start_from)
+    post(:create, create_job_params(params, start_from))
      assert_response :success
      new_job = assigns(:object)
      assert_not_nil new_job
+    new_job
+  end
+
+  def check_new_job_created_from(params, start_from=DEFAULT_START_JOB)
+    start_time = Time.now
+    new_job = create_job_from(params, start_from)
      assert_operator(start_time, :<=, new_job.created_at)
      new_job
    end
  
-  def check_errors_from(params)
-    post(:create, create_foo_hash_job_params(params))
+  def check_job_reused_from(params, start_from)
+    new_job = create_job_from(params, start_from)
+    assert_equal(jobs(start_from).uuid, new_job.uuid)
+  end
+
+  def check_errors_from(params, start_from=DEFAULT_START_JOB)
+    post(:create, create_job_params(params, start_from))
      assert_includes(405..499, @response.code.to_i)
      errors = json_response.fetch("errors", [])
      assert(errors.any?, "no errors assigned from #{params}")
@@ -670,27 +707,40 @@ class Arvados::V1::JobReuseControllerTest < ActionController::TestCase
             "bad refspec not mentioned in error message")
    end
  
-  test "can't reuse job with older Arvados SDK version" do
-    params = {
-      script_version: "31ce37fe365b3dc204300a3e4c396ad333ed0556",
-      runtime_constraints: {
-        "arvados_sdk_version" => "master",
-        "docker_image" => links(:docker_image_collection_tag).name,
-      },
-    }
-    check_new_job_created_from(job: params)
+  test "don't reuse job with older Arvados SDK version specified by branch" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "master",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
+  end
+
+  test "don't reuse job with older Arvados SDK version specified by commit" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "ca68b24e51992e790f29df5cc4bc54ce1da4a1c2",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
+  end
+
+  test "don't reuse job with newer Arvados SDK version specified by commit" do
+    jobspec = {runtime_constraints: {
+        arvados_sdk_version: "436637c87a1d2bdbf4b624008304064b6cf0e30c",
+      }}
+    check_new_job_created_from({job: jobspec},
+                               :previous_job_run_with_arvados_sdk_version)
    end
  
    test "reuse job from arvados_sdk_version git filters" do
+    prev_job = jobs(:previous_job_run_with_arvados_sdk_version)
      filters_hash = BASE_FILTERS.
-      merge("arvados_sdk_version" => ["in git", "commit2"])
+      merge("arvados_sdk_version" => ["in git", "commit2"],
+            "docker_image_locator" => ["=", prev_job.docker_image_locator])
      filters_hash.delete("script_version")
-    params = create_foo_hash_job_params(filters:
-                                        filters_from_hash(filters_hash))
+    params = create_job_params(filters: filters_from_hash(filters_hash))
      post(:create, params)
      assert_response :success
-    assert_equal(jobs(:previous_job_run_with_arvados_sdk_version).uuid,
-                 assigns(:object).uuid)
+    assert_equal(prev_job.uuid, assigns(:object).uuid)
    end
  
    test "create new job because of arvados_sdk_version 'not in git' filters" do
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb

index 07e7f840a1bafedcd456d2d674e1ad243b59ec0a..1e1425e92b7d27057e89a335c2480b8024b0c444 100644 (file)
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -10,7 +10,7 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
      post :create, job: {
        script: "hash",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
        script_parameters: {}
      }
      assert_response :success
@@ -27,7 +27,7 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
        script: "hash",
        script_version: "master",
        script_parameters: {},
-      repository: "foo",
+      repository: "active/foo",
        started_at: Time.now,
        finished_at: Time.now,
        running: false,
@@ -392,4 +392,45 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
      post :lock, {id: jobs(:running).uuid}
      assert_response 403 # forbidden
    end
+
+  test 'reject invalid commit in remote repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "abc123",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response 422
+  end
+
+  test 'tag remote commit in internal repository' do
+    authorize_with :active
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: url,
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
+
+  test 'tag local commit in internal repository' do
+    authorize_with :active
+    post :create, job: {
+      script: "hash",
+      script_version: "master",
+      repository: "active/foo",
+      script_parameters: {}
+    }
+    assert_response :success
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(json_response['uuid']))
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/links_controller_test.rb b/services/api/test/functional/arvados/v1/links_controller_test.rb

index 9bf1b0bab1ccdf37f7e5fe4ec63192ef5f475b76..1345701b43e8a7666a7c634d8f73c7762ad59467 100644 (file)
--- a/services/api/test/functional/arvados/v1/links_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/links_controller_test.rb
@@ -122,7 +122,7 @@ class Arvados::V1::LinksControllerTest < ActionController::TestCase
        link_class: 'test',
        name: 'stuff',
        head_uuid: users(:active).uuid,
-      tail_uuid: virtual_machines(:testvm2).uuid
+      tail_uuid: authorized_keys(:admin).uuid,
      }
      authorize_with :active
      post :create, link: link
diff --git a/services/api/test/functional/arvados/v1/nodes_controller_test.rb b/services/api/test/functional/arvados/v1/nodes_controller_test.rb

index 7ea231eecb9f28a35d8ccf67db2727f33446ff84..d2f56699ed0c0a858b3d296bd1a799dd605fa4a0 100644 (file)
--- a/services/api/test/functional/arvados/v1/nodes_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/nodes_controller_test.rb
@@ -173,4 +173,13 @@ class Arvados::V1::NodesControllerTest < ActionController::TestCase
      assert_equal(jobs(:nearly_finished_job).uuid, json_response["job_uuid"],
                   "mismatched job UUID after ping update")
    end
+
+  test "node should fail ping with invalid hostname config format" do
+    Rails.configuration.assign_node_hostname = 'compute%<slot_number>04'  # should end with "04d"
+    post :ping, {
+      id: nodes(:new_with_no_hostname).uuid,
+      ping_secret: nodes(:new_with_no_hostname).info['ping_secret'],
+    }
+    assert_response 422
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/repositories_controller_test.rb b/services/api/test/functional/arvados/v1/repositories_controller_test.rb

index 5304bcafc531bc1138c8c17071fd2793d71aff1c..514bb66bb2b55eaabfffd9e2494c59500c1a58bc 100644 (file)
--- a/services/api/test/functional/arvados/v1/repositories_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/repositories_controller_test.rb
@@ -42,6 +42,26 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
      end
    end
  
+  test "get_all_permissions takes into account is_active flag" do
+    r = nil
+    act_as_user users(:active) do
+      r = Repository.create! name: 'active/testrepo'
+    end
+    act_as_system_user do
+      u = users(:active)
+      u.is_active = false
+      u.save!
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    json_response['repositories'].each do |r|
+      r['user_permissions'].each do |user_uuid, perms|
+        refute_equal user_uuid, users(:active).uuid
+      end
+    end
+  end
+
    test "get_all_permissions does not give any access to user without permission" do
      viewer_uuid = users(:project_viewer).uuid
      assert_equal(authorized_keys(:project_viewer).authorized_user_uuid,
@@ -87,4 +107,135 @@ class Arvados::V1::RepositoriesControllerTest < ActionController::TestCase
                     "response public_key does not match fixture #{u}.")
      end
    end
+
+  test "get_all_permissions lists all repos regardless of permissions" do
+    act_as_system_user do
+      # Create repos that could potentially be left out of the
+      # permission list by accident.
+
+      # No authorized_key, no username (this can't even be done
+      # without skipping validations)
+      r = Repository.create name: 'root/testrepo'
+      assert r.save validate: false
+
+      r = Repository.create name: 'invalid username / repo name', owner_uuid: users(:inactive).uuid
+      assert r.save validate: false
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    assert_equal(Repository.count, json_response["repositories"].size)
+  end
+
+  test "get_all_permissions lists user permissions for users with no authorized keys" do
+    authorize_with :admin
+    AuthorizedKey.destroy_all
+    get :get_all_permissions
+    assert_response :success
+    assert_equal(Repository.count, json_response["repositories"].size)
+    repos_with_perms = []
+    json_response['repositories'].each do |repo|
+      if repo['user_permissions'].any?
+        repos_with_perms << repo['uuid']
+      end
+    end
+    assert_not_empty repos_with_perms, 'permissions are missing'
+  end
+
+  # Ensure get_all_permissions correctly describes what the normal
+  # permission system would do.
+  test "get_all_permissions obeys group permissions" do
+    act_as_user system_user do
+      r = Repository.create!(name: 'admin/groupcanwrite', owner_uuid: users(:admin).uuid)
+      g = Group.create!(group_class: 'group', name: 'repo-writers')
+      u1 = users(:active)
+      u2 = users(:spectator)
+      Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_manage')
+      Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+      Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+
+      r = Repository.create!(name: 'admin/groupreadonly', owner_uuid: users(:admin).uuid)
+      g = Group.create!(group_class: 'group', name: 'repo-readers')
+      u1 = users(:active)
+      u2 = users(:spectator)
+      Link.create!(tail_uuid: g.uuid, head_uuid: r.uuid, link_class: 'permission', name: 'can_read')
+      Link.create!(tail_uuid: u1.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_write')
+      Link.create!(tail_uuid: u2.uuid, head_uuid: g.uuid, link_class: 'permission', name: 'can_read')
+    end
+    authorize_with :admin
+    get :get_all_permissions
+    assert_response :success
+    json_response['repositories'].each do |repo|
+      repo['user_permissions'].each do |user_uuid, perms|
+        u = User.find_by_uuid(user_uuid)
+        if perms['can_read']
+          assert u.can? read: repo['uuid']
+          assert_match /R/, perms['gitolite_permissions']
+        else
+          refute_match /R/, perms['gitolite_permissions']
+        end
+        if perms['can_write']
+          assert u.can? write: repo['uuid']
+          assert_match /RW/, perms['gitolite_permissions']
+        else
+          refute_match /W/, perms['gitolite_permissions']
+        end
+        if perms['can_manage']
+          assert u.can? manage: repo['uuid']
+          assert_match /RW/, perms['gitolite_permissions']
+        end
+      end
+    end
+  end
+
+  test "default index includes fetch_url" do
+    authorize_with :active
+    get(:index)
+    assert_response :success
+    assert_includes(json_response["items"].map { |r| r["fetch_url"] },
+                    "git@git.zzzzz.arvadosapi.com:active/foo.git")
+  end
+
+  [
+    {cfg: :git_repo_ssh_base, cfgval: "git@example.com:", match: %r"^git@example.com:/"},
+    {cfg: :git_repo_ssh_base, cfgval: true, match: %r"^git@git.zzzzz.arvadosapi.com:/"},
+    {cfg: :git_repo_ssh_base, cfgval: false, refute: /^git@/ },
+    {cfg: :git_repo_https_base, cfgval: "https://example.com/", match: %r"https://example.com/"},
+    {cfg: :git_repo_https_base, cfgval: true, match: %r"^https://git.zzzzz.arvadosapi.com/"},
+    {cfg: :git_repo_https_base, cfgval: false, refute: /^http/ },
+  ].each do |expect|
+    test "set #{expect[:cfg]} to #{expect[:cfgval]}" do
+      Rails.configuration.send expect[:cfg].to_s+"=", expect[:cfgval]
+      authorize_with :active
+      get :index
+      assert_response :success
+      json_response['items'].each do |r|
+        if expect[:refute]
+          r['clone_urls'].each do |u|
+            refute_match expect[:refute], u
+          end
+        else
+          assert r['clone_urls'].any? do |u|
+            expect[:prefix].match u
+          end
+        end
+      end
+    end
+  end
+
+  test "select push_url in index" do
+    authorize_with :active
+    get(:index, {select: ["uuid", "push_url"]})
+    assert_response :success
+    assert_includes(json_response["items"].map { |r| r["push_url"] },
+                    "git@git.zzzzz.arvadosapi.com:active/foo.git")
+  end
+
+  test "select clone_urls in index" do
+    authorize_with :active
+    get(:index, {select: ["uuid", "clone_urls"]})
+    assert_response :success
+    assert_includes(json_response["items"].map { |r| r["clone_urls"] }.flatten,
+                    "git@git.zzzzz.arvadosapi.com:active/foo.git")
+  end
  end
diff --git a/services/api/test/functional/arvados/v1/users_controller_test.rb b/services/api/test/functional/arvados/v1/users_controller_test.rb

index 2d26370b749f5b07dc866855563c62cd31f9c03a..157e487859c927a978baad10c810592e36be9e77 100644 (file)
--- a/services/api/test/functional/arvados/v1/users_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/users_controller_test.rb
@@ -83,7 +83,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
    test "create user with user, vm and repo as input" do
      authorize_with :admin
-    repo_name = 'test_repo'
+    repo_name = 'usertestrepo'
  
      post :setup, {
        repo_name: repo_name,
@@ -113,7 +113,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          created['uuid'], created['email'], 'arvados#user', false, 'User'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        repo_name, created['uuid'], 'arvados#repository', true, 'Repository'
+        "foo/#{repo_name}", created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
@@ -129,7 +129,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        uuid: 'bogus_uuid',
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid
      }
      response_body = JSON.parse(@response.body)
@@ -143,7 +143,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        user: {uuid: 'bogus_uuid'},
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid,
        openid_prefix: 'https://www.google.com/accounts/o8/id'
      }
@@ -158,7 +158,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid,
        openid_prefix: 'https://www.google.com/accounts/o8/id'
      }
@@ -174,7 +174,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        user: {},
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid,
        openid_prefix: 'https://www.google.com/accounts/o8/id'
      }
@@ -191,7 +191,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        uuid: users(:inactive).uuid,
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid
      }
  
@@ -207,7 +207,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      # expect repo and vm links
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'test_repo', resp_obj['uuid'], 'arvados#repository', true, 'Repository'
+        'inactiveuser/usertestrepo', resp_obj['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#virtualMachine', true, 'permission', 'can_login',
          @vm_uuid, resp_obj['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
@@ -257,7 +257,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        user: {email: 'foo@example.com'},
        openid_prefix: 'https://www.google.com/accounts/o8/id'
      }
@@ -276,7 +276,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: 'no_such_vm',
        user: {email: 'foo@example.com'},
        openid_prefix: 'https://www.google.com/accounts/o8/id'
@@ -293,7 +293,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        openid_prefix: 'https://www.google.com/accounts/o8/id',
        vm_uuid: @vm_uuid,
        user: {email: 'foo@example.com'}
@@ -333,7 +333,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          'All users', response_object['uuid'], 'arvados#group', true, 'Group'
  
      verify_link response_items, 'arvados#repository', false, 'permission', 'can_manage',
-        'test_repo', response_object['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/usertestrepo', response_object['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#virtualMachine', false, 'permission', 'can_login',
          nil, response_object['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
@@ -344,7 +344,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        openid_prefix: 'https://www.google.com/accounts/o8/id',
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        vm_uuid: @vm_uuid,
        user: {
          first_name: 'test_first_name',
@@ -370,7 +370,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      post :setup, {
        openid_prefix: 'https://www.google.com/accounts/o8/id',
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        user: {
          email: inactive_user['email']
        }
@@ -391,7 +391,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        openid_prefix: 'http://www.example.com/account',
        user: {
          first_name: "in_create_test_first_name",
@@ -418,7 +418,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          created['uuid'], created['email'], 'arvados#user', false, 'User'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'test_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/usertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
@@ -431,7 +431,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      post :setup, {
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        user: {
          first_name: "in_create_test_first_name",
          last_name: "test_last_name",
@@ -456,7 +456,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          email: "foo@example.com"
        },
        vm_uuid: @vm_uuid,
-      repo_name: 'test_repo',
+      repo_name: 'usertestrepo',
        openid_prefix: 'https://www.google.com/accounts/o8/id'
      }
  
@@ -478,7 +478,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          created['uuid'], created['email'], 'arvados#user', false, 'User'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'test_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/usertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
@@ -522,7 +522,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      # invoke setup with a repository
      post :setup, {
-      repo_name: 'new_repo',
+      repo_name: 'usertestrepo',
        uuid: active_user['uuid']
      }
  
@@ -538,7 +538,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'new_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+        'active/usertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#virtualMachine', false, 'permission', 'can_login',
          nil, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
@@ -547,6 +547,11 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
    test "setup active user with vm and no repo" do
      authorize_with :admin
      active_user = users(:active)
+    repos_query = Repository.where(owner_uuid: active_user.uuid)
+    repo_link_query = Link.where(tail_uuid: active_user.uuid,
+                                 link_class: "permission", name: "can_manage")
+    repos_count = repos_query.count
+    repo_link_count = repo_link_query.count
  
      # invoke setup with a repository
      post :setup, {
@@ -566,8 +571,8 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
-    verify_link response_items, 'arvados#repository', false, 'permission', 'can_manage',
-        'new_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+    assert_equal(repos_count, repos_query.count)
+    assert_equal(repo_link_count, repo_link_query.count)
  
      verify_link response_items, 'arvados#virtualMachine', true, 'permission', 'can_login',
          @vm_uuid, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
@@ -579,7 +584,7 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      assert active_user['is_active'], 'expected is_active for active user'
  
      verify_link_existence active_user['uuid'], active_user['email'],
-          false, true, false, true, true
+          false, true, true, true, true
  
      authorize_with :admin
  
@@ -648,13 +653,10 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
  
      assert_equal Rails.configuration.user_notifier_email_from, setup_email.from[0]
      assert_equal 'foo@example.com', setup_email.to[0]
-    assert_equal 'Welcome to Curoverse', setup_email.subject
-    assert (setup_email.body.to_s.include? 'Your Arvados account has been set up'),
-        'Expected Your Arvados account has been set up in email body'
-    assert (setup_email.body.to_s.include? 'foo@example.com'),
-        'Expected user email in email body'
-    assert (setup_email.body.to_s.include? Rails.configuration.workbench_address),
-        'Expected workbench url in email body'
+    assert_equal 'Welcome to Curoverse - shell account enabled', setup_email.subject
+    assert (setup_email.body.to_s.include? 'Your Arvados shell account has been set up'),
+        'Expected Your Arvados shell account has been set up in email body'
+    assert (setup_email.body.to_s.include? "#{Rails.configuration.workbench_address}users/#{created['uuid']}/virtual_machines"), 'Expected virtual machines url in email body'
    end
  
    test "non-admin user can get basic information about readable users" do
@@ -740,17 +742,17 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
      authorize_with :admin
  
      user = {}
-    user[:prefs] = users(:active_no_prefs_profile).prefs
+    user[:prefs] = users(:active_no_prefs_profile_no_getting_started_shown).prefs
      user[:prefs][:profile] = {:profile => {'organization' => 'example.com'}}
      put :update, {
-      id: users(:active_no_prefs_profile).uuid,
+      id: users(:active_no_prefs_profile_no_getting_started_shown).uuid,
        user: user
      }
      assert_response :success
  
      found_email = false
      ActionMailer::Base.deliveries.andand.each do |email|
-      if email.subject == "Profile created by #{users(:active_no_prefs_profile).email}"
+      if email.subject == "Profile created by #{users(:active_no_prefs_profile_no_getting_started_shown).email}"
          found_email = true
          break
        end
diff --git a/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb b/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb

index fd7431dc441475f7f16f5e4b0d966e9cead48170..329bc1589afc6a2298472d739524fc094e7f0723 100644 (file)
--- a/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/virtual_machines_controller_test.rb
@@ -1,4 +1,75 @@
  require 'test_helper'
  
  class Arvados::V1::VirtualMachinesControllerTest < ActionController::TestCase
+  def get_logins_for(vm_sym)
+    authorize_with :admin
+    get(:logins, id: virtual_machines(vm_sym).uuid)
+  end
+
+  def find_login(sshkey_sym)
+    assert_response :success
+    want_key = authorized_keys(sshkey_sym).public_key
+    logins = json_response["items"].select do |login|
+      login["public_key"] == want_key
+    end
+    assert_equal(1, logins.size, "failed to find #{sshkey_sym} login")
+    logins.first
+  end
+
+  test "username propagated from permission" do
+    get_logins_for(:testvm2)
+    admin_login = find_login(:admin)
+    perm = links(:admin_can_login_to_testvm2)
+    assert_equal(perm.properties["username"], admin_login["username"])
+  end
+
+  test "groups propagated from permission" do
+    get_logins_for(:testvm2)
+    admin_login = find_login(:admin)
+    perm = links(:admin_can_login_to_testvm2)
+    assert_equal(perm.properties["groups"], admin_login["groups"])
+  end
+
+  test "groups is an empty list by default" do
+    get_logins_for(:testvm2)
+    active_login = find_login(:active)
+    perm = links(:active_can_login_to_testvm2)
+    assert_equal([], active_login["groups"])
+  end
+
+  test "logins without usernames not listed" do
+    get_logins_for(:testvm2)
+    assert_response :success
+    spectator_uuid = users(:spectator).uuid
+    assert_empty(json_response.
+                 select { |login| login["user_uuid"] == spectator_uuid })
+  end
+
+  test "logins without ssh keys are listed" do
+    u, vm = nil
+    act_as_system_user do
+      u = create :active_user, first_name: 'Bob', last_name: 'Blogin'
+      vm = VirtualMachine.create! hostname: 'foo.shell'
+      Link.create!(tail_uuid: u.uuid,
+                   head_uuid: vm.uuid,
+                   link_class: 'permission',
+                   name: 'can_login',
+                   properties: {'username' => 'bobblogin'})
+    end
+    authorize_with :admin
+    get :logins, id: vm.uuid
+    assert_response :success
+    assert_equal 1, json_response['items'].length
+    assert_equal nil, json_response['items'][0]['public_key']
+    assert_equal nil, json_response['items'][0]['authorized_key_uuid']
+    assert_equal u.uuid, json_response['items'][0]['user_uuid']
+    assert_equal 'bobblogin', json_response['items'][0]['username']
+  end
+
+  test 'get all logins' do
+    authorize_with :admin
+    get :get_all_logins
+    find_login :admin
+    find_login :active
+  end
  end
diff --git a/services/api/test/helpers/git_test_helper.rb b/services/api/test/helpers/git_test_helper.rb

index 67e99c18dcfb25340153ace9e4d29674af17bb16..6fce321953d3add8138056306629500927771f65 100644 (file)
--- a/services/api/test/helpers/git_test_helper.rb
+++ b/services/api/test/helpers/git_test_helper.rb
@@ -14,15 +14,47 @@ require 'tmpdir'
  module GitTestHelper
    def self.included base
      base.setup do
-      @tmpdir = Dir.mktmpdir()
-      system("tar", "-xC", @tmpdir, "-f", "test/test.git.tar")
+      # Extract the test repository data into the default test
+      # environment's Rails.configuration.git_repositories_dir. (We
+      # don't use that config setting here, though: it doesn't seem
+      # worth the risk of stepping on a real git repo root.)
+      @tmpdir = Rails.root.join 'tmp', 'git'
+      FileUtils.mkdir_p @tmpdir
+      system("tar", "-xC", @tmpdir.to_s, "-f", "test/test.git.tar")
        Rails.configuration.git_repositories_dir = "#{@tmpdir}/test"
-      Commit.refresh_repositories
+
+      intdir = Rails.configuration.git_internal_dir
+      if not File.exist? intdir
+        FileUtils.mkdir_p intdir
+        IO.read("|git --git-dir #{intdir.to_s.shellescape} init")
+        assert $?.success?
+      end
      end
  
      base.teardown do
        FileUtils.remove_entry @tmpdir, true
-      Commit.refresh_repositories
+      FileUtils.remove_entry Commit.cache_dir_base, true
+    end
+  end
+
+  def internal_tag tag
+    IO.read "|git --git-dir #{Rails.configuration.git_internal_dir.shellescape} log --format=format:%H -n1 #{tag.shellescape}"
+  end
+
+  # Intercept fetch_remote_repository and fetch from a specified url
+  # or local fixture instead of the remote url requested. fakeurl can
+  # be a url (probably starting with file:///) or the name of a
+  # fixture (as a symbol)
+  def fetch_remote_from_local_repo url, fakeurl
+    if fakeurl.is_a? Symbol
+      fakeurl = 'file://' + repositories(fakeurl).server_path
+    end
+    Commit.expects(:fetch_remote_repository).once.with do |gitdir, giturl|
+      if giturl == url
+        Commit.unstub(:fetch_remote_repository)
+        Commit.fetch_remote_repository gitdir, fakeurl
+        true
+      end
      end
    end
  end
diff --git a/services/api/test/helpers/manifest_examples.rb b/services/api/test/helpers/manifest_examples.rb

new file mode 100644 (file)

index 0000000..5acefe9
--- /dev/null
+++ b/services/api/test/helpers/manifest_examples.rb
@@ -0,0 +1,31 @@
+module ManifestExamples
+  def make_manifest opts={}
+    opts = {
+      bytes_per_block: 1,
+      blocks_per_file: 1,
+      files_per_stream: 1,
+      streams: 1,
+    }.merge(opts)
+    datablip = "x" * opts[:bytes_per_block]
+    locator = Blob.sign_locator(Digest::MD5.hexdigest(datablip) +
+                                '+' + datablip.length.to_s,
+                                api_token: opts[:api_token])
+    filesize = datablip.length * opts[:blocks_per_file]
+    txt = ''
+    (1..opts[:streams]).each do |s|
+      streamtoken = "./stream#{s}"
+      streamsize = 0
+      blocktokens = []
+      filetokens = []
+      (1..opts[:files_per_stream]).each do |f|
+        filetokens << "#{streamsize}:#{filesize}:file#{f}.txt"
+        (1..opts[:blocks_per_file]).each do |b|
+          blocktokens << locator
+        end
+        streamsize += filesize
+      end
+      txt << ([streamtoken] + blocktokens + filetokens).join(' ') + "\n"
+    end
+    txt
+  end
+end
diff --git a/services/api/test/helpers/time_block.rb b/services/api/test/helpers/time_block.rb

new file mode 100644 (file)

index 0000000..a3b03ff
--- /dev/null
+++ b/services/api/test/helpers/time_block.rb
@@ -0,0 +1,11 @@
+class ActiveSupport::TestCase
+  def time_block label
+    t0 = Time.now
+    begin
+      yield
+    ensure
+      t1 = Time.now
+      $stderr.puts "#{t1 - t0}s #{label}"
+    end
+  end
+end
diff --git a/services/api/test/integration/collections_performance_test.rb b/services/api/test/integration/collections_performance_test.rb

new file mode 100644 (file)

index 0000000..892060a
--- /dev/null
+++ b/services/api/test/integration/collections_performance_test.rb
@@ -0,0 +1,40 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionsApiPerformanceTest < ActionDispatch::IntegrationTest
+  include ManifestExamples
+
+  test "crud cycle for a collection with a big manifest" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 20,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    json = time_block "JSON encode #{bigmanifest.length>>20}MiB manifest" do
+      Oj.dump({manifest_text: bigmanifest})
+    end
+    time_block 'create' do
+      post '/arvados/v1/collections', {collection: json}, auth(:active)
+      assert_response :success
+    end
+    uuid = json_response['uuid']
+    time_block 'read' do
+      get '/arvados/v1/collections/' + uuid, {}, auth(:active)
+      assert_response :success
+    end
+    time_block 'list' do
+      get '/arvados/v1/collections', {select: ['manifest_text'], filters: [['uuid', '=', uuid]].to_json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'update' do
+      put '/arvados/v1/collections/' + uuid, {collection: json}, auth(:active)
+      assert_response :success
+    end
+    time_block 'delete' do
+      delete '/arvados/v1/collections/' + uuid, {}, auth(:active)
+    end
+  end
+end
diff --git a/services/api/test/integration/crunch_dispatch_test.rb b/services/api/test/integration/crunch_dispatch_test.rb

index 81767af905cc609f3cfc18a56b404446cbb10bb1..a6f937bbda66c0efa3a52630e15a0b37278579b2 100644 (file)
--- a/services/api/test/integration/crunch_dispatch_test.rb
+++ b/services/api/test/integration/crunch_dispatch_test.rb
@@ -28,7 +28,7 @@ class CrunchDispatchTest < ActionDispatch::IntegrationTest
        format: "json",
        job: {
          script: "log",
-        repository: "crunch_dispatch_test",
+        repository: "active/crunchdispatchtest",
          script_version: "f35f99b7d32bac257f5989df02b9f12ee1a9b0d6",
          script_parameters: "{}"
        }
diff --git a/services/api/test/integration/serialized_encoding_test.rb b/services/api/test/integration/serialized_encoding_test.rb

index 8a1cb10004f2bcfedf379d3d944c4251828fd02f..36c533a9bbe8d1deb1366b971ac325bd08a37978 100644 (file)
--- a/services/api/test/integration/serialized_encoding_test.rb
+++ b/services/api/test/integration/serialized_encoding_test.rb
@@ -12,7 +12,7 @@ class SerializedEncodingTest < ActionDispatch::IntegrationTest
      human: {properties: {eye_color: 'gray'}},
  
      job: {
-      repository: 'foo',
+      repository: 'active/foo',
        runtime_constraints: {docker_image: 'arvados/apitestfixture'},
        script: 'hash',
        script_version: 'master',
diff --git a/services/api/test/integration/users_test.rb b/services/api/test/integration/users_test.rb

index 0d6c0f360f3156ae15e32f0098c510afb00e796f..38ac12267aaf8ec6b894835ae0f5876aff9e04d2 100644 (file)
--- a/services/api/test/integration/users_test.rb
+++ b/services/api/test/integration/users_test.rb
@@ -5,7 +5,7 @@ class UsersTest < ActionDispatch::IntegrationTest
    include UsersTestHelper
  
    test "setup user multiple times" do
-    repo_name = 'test_repo'
+    repo_name = 'usertestrepo'
  
      post "/arvados/v1/users/setup", {
        repo_name: repo_name,
@@ -35,7 +35,7 @@ class UsersTest < ActionDispatch::IntegrationTest
          created['uuid'], created['email'], 'arvados#user', false, 'arvados#user'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        repo_name, created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/usertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
@@ -71,7 +71,7 @@ class UsersTest < ActionDispatch::IntegrationTest
  
      # arvados#user, repo link and link add user to 'All users' group
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        repo_name, created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/usertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
@@ -105,16 +105,13 @@ class UsersTest < ActionDispatch::IntegrationTest
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
-    verify_link response_items, 'arvados#repository', false, 'permission', 'can_manage',
-        'test_repo', created['uuid'], 'arvados#repository', true, 'Repository'
-
      verify_link response_items, 'arvados#virtualMachine', false, 'permission', 'can_login',
          nil, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
  
     # invoke setup with a repository
      post "/arvados/v1/users/setup", {
        openid_prefix: 'http://www.example.com/account',
-      repo_name: 'new_repo',
+      repo_name: 'newusertestrepo',
        uuid: created['uuid']
      }, auth(:admin)
  
@@ -130,7 +127,7 @@ class UsersTest < ActionDispatch::IntegrationTest
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'new_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/newusertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#virtualMachine', false, 'permission', 'can_login',
          nil, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
@@ -156,17 +153,13 @@ class UsersTest < ActionDispatch::IntegrationTest
      verify_link response_items, 'arvados#group', true, 'permission', 'can_read',
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
-    # since no repo name in input, we won't get any; even though user has one
-    verify_link response_items, 'arvados#repository', false, 'permission', 'can_manage',
-        'new_repo', created['uuid'], 'arvados#repository', true, 'Repository'
-
      verify_link response_items, 'arvados#virtualMachine', true, 'permission', 'can_login',
          virtual_machines(:testvm).uuid, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
    end
  
    test "setup and unsetup user" do
      post "/arvados/v1/users/setup", {
-      repo_name: 'test_repo',
+      repo_name: 'newusertestrepo',
        vm_uuid: virtual_machines(:testvm).uuid,
        user: {email: 'foo@example.com'},
        openid_prefix: 'https://www.google.com/accounts/o8/id'
@@ -186,7 +179,7 @@ class UsersTest < ActionDispatch::IntegrationTest
          'All users', created['uuid'], 'arvados#group', true, 'Group'
  
      verify_link response_items, 'arvados#repository', true, 'permission', 'can_manage',
-        'test_repo', created['uuid'], 'arvados#repository', true, 'Repository'
+        'foo/newusertestrepo', created['uuid'], 'arvados#repository', true, 'Repository'
  
      verify_link response_items, 'arvados#virtualMachine', true, 'permission', 'can_login',
          virtual_machines(:testvm).uuid, created['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
diff --git a/services/api/test/integration/websocket_test.rb b/services/api/test/integration/websocket_test.rb

index 9179acd6803fa2fcb0384a507103b9bf390a5d15..c4d6d5eb7e6eb54eaba2c830035321656668aa33 100644 (file)
--- a/services/api/test/integration/websocket_test.rb
+++ b/services/api/test/integration/websocket_test.rb
@@ -84,7 +84,7 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal 200, status
    end
  
-  test "connect, subscribe, get event" do
+  def subscribe_test
      state = 1
      spec = nil
      ev_uuid = nil
@@ -115,6 +115,10 @@ class WebsocketTest < ActionDispatch::IntegrationTest
      assert_equal spec.uuid, ev_uuid
    end
  
+  test "connect, subscribe, get event" do
+    subscribe_test()
+  end
+
    test "connect, subscribe, get two events" do
      state = 1
      spec = nil
@@ -646,4 +650,45 @@ class WebsocketTest < ActionDispatch::IntegrationTest
    end
  
  
+  test "connect, subscribe with invalid filter" do
+    state = 1
+    human = nil
+    human_ev_uuid = nil
+
+    authorize_with :admin
+
+    ws_helper :admin do |ws|
+      ws.on :open do |event|
+        # test that #6451 is fixed (invalid filter crashes websockets)
+        ws.send ({method: 'subscribe', filters: [['object_blarg', 'is_a', 'arvados#human']]}.to_json)
+      end
+
+      ws.on :message do |event|
+        d = Oj.load event.data
+        case state
+        when 1
+          assert_equal 200, d["status"]
+          Specimen.create
+          human = Human.create
+          state = 2
+        when 2
+          assert_equal 500, d["status"]
+          state = 3
+          ws.close
+        when 3
+          assert false, "Should not get any more events"
+        end
+      end
+
+    end
+
+    assert_equal 3, state
+
+    # Try connecting again, ensure that websockets server is still running and
+    # didn't crash per #6451
+    subscribe_test()
+
+  end
+
+
  end
diff --git a/services/api/test/tasks/delete_old_job_logs_test.rb b/services/api/test/tasks/delete_old_job_logs_test.rb

new file mode 100644 (file)

index 0000000..b922fb3
--- /dev/null
+++ b/services/api/test/tasks/delete_old_job_logs_test.rb
@@ -0,0 +1,50 @@
+require 'test_helper'
+require 'rake'
+
+Rake.application.rake_require "tasks/delete_old_job_logs"
+Rake::Task.define_task(:environment)
+
+class DeleteOldJobLogsTaskTest < ActiveSupport::TestCase
+  TASK_NAME = "db:delete_old_job_logs"
+
+  def log_uuids(*fixture_names)
+    fixture_names.map { |name| logs(name).uuid }
+  end
+
+  def run_with_expiry(clean_after)
+    Rails.configuration.clean_job_log_rows_after = clean_after
+    Rake::Task[TASK_NAME].reenable
+    Rake.application.invoke_task TASK_NAME
+  end
+
+  def job_stderr_logs
+    Log.where("object_uuid LIKE :pattern AND event_type = :etype",
+              pattern: "_____-8i9sb-_______________",
+              etype: "stderr")
+  end
+
+  def check_existence(test_method, fixture_uuids)
+    uuids_now = job_stderr_logs.map(&:uuid)
+    fixture_uuids.each do |expect_uuid|
+      send(test_method, uuids_now, expect_uuid)
+    end
+  end
+
+  test "delete all logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_previous_job,
+                               :crunchstat_for_ancient_job)
+    run_with_expiry(1)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+
+  test "delete only old logs" do
+    uuids_to_keep = log_uuids(:crunchstat_for_running_job,
+                              :crunchstat_for_previous_job)
+    uuids_to_clean = log_uuids(:crunchstat_for_ancient_job)
+    run_with_expiry(360.days)
+    check_existence(:assert_includes, uuids_to_keep)
+    check_existence(:refute_includes, uuids_to_clean)
+  end
+end
diff --git a/services/api/test/test.git.tar b/services/api/test/test.git.tar

index ae466016a3ab658bf133bcb3d5f0ce9358900a05..faa0d656d392c1862349c69234ae408ee8dbe738 100644 (file)

Binary files a/services/api/test/test.git.tar and b/services/api/test/test.git.tar differ
diff --git a/services/api/test/test_helper.rb b/services/api/test/test_helper.rb

index 5ea6e62bfa73381e0f7e95b79aba31be7143ee08..68d4bbf5af4b03349b11259f82357e917dd52cf7 100644 (file)
--- a/services/api/test/test_helper.rb
+++ b/services/api/test/test_helper.rb
@@ -22,6 +22,7 @@ end
  
  require File.expand_path('../../config/environment', __FILE__)
  require 'rails/test_help'
+require 'mocha/mini_test'
  
  module ArvadosTestSupport
    def json_response
@@ -52,6 +53,25 @@ class ActiveSupport::TestCase
      restore_configuration
    end
  
+  def assert_not_allowed
+    # Provide a block that calls a Rails boolean "true or false" success value,
+    # like model.save or model.destroy.  This method will test that it either
+    # returns false, or raises a Permission Denied exception.
+    begin
+      refute(yield)
+    rescue ArvadosModel::PermissionDeniedError
+    end
+  end
+
+  def add_permission_link from_who, to_what, perm_type
+    act_as_system_user do
+      Link.create!(tail_uuid: from_who.uuid,
+                   head_uuid: to_what.uuid,
+                   link_class: 'permission',
+                   name: perm_type)
+    end
+  end
+
    def restore_configuration
      # Restore configuration settings changed during tests
      $application_config.each do |k,v|
diff --git a/services/api/test/unit/arvados_model_test.rb b/services/api/test/unit/arvados_model_test.rb

index 0418a94510d9e046564cd5b49db5f659bd194330..6918aa0d00058b4d6183e92c89506aacb45a3f85 100644 (file)
--- a/services/api/test/unit/arvados_model_test.rb
+++ b/services/api/test/unit/arvados_model_test.rb
@@ -131,7 +131,7 @@ class ArvadosModelTest < ActiveSupport::TestCase
          search_index_columns = table_class.searchable_columns('ilike')
          # Disappointing, but text columns aren't indexed yet.
          search_index_columns -= table_class.columns.select { |c|
-          c.type == :text or c.name == 'description'
+          c.type == :text or c.name == 'description' or c.name == 'file_names'
          }.collect(&:name)
  
          indexes = ActiveRecord::Base.connection.indexes(table)
diff --git a/services/api/test/unit/authorized_key_test.rb b/services/api/test/unit/authorized_key_test.rb

index b8d9b6786cc951a7a37f23e2126a39ac70fd2a78..5a661785bd7bef903747b5890bb135b8bacaebf1 100644 (file)
--- a/services/api/test/unit/authorized_key_test.rb
+++ b/services/api/test/unit/authorized_key_test.rb
@@ -1,7 +1,47 @@
  require 'test_helper'
  
  class AuthorizedKeyTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  TEST_KEY = "ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCf5aTI55uyWr44TckP/ELUAyPsdnf5fTZDcSDN4qiMZYAL7TYV2ixwnbPObLObM0GmHSSFLV1KqsuFICUPgkyKoHbAH6XPgmtfOLU60VkGf1v5uxQ/kXCECRCJmPb3K9dIXGEw+1DXPdOV/xG7rJNvo4a9WK9iqqZr8p+VGKM6C017b8BDLk0tuEEjZ5jXcT/ka/hTScxWkKgF6auPOVQ79OA5+0VaYm4uQLzVUdgwVUPWQQecRrtnc08XYM1htpcLDIAbWfUNK7uE6XR3/OhtrJGf05FGbtGguPgi33F9W3Q3yw6saOK5Y3TfLbskgFaEdLgzqK/QSBRk2zBF49Tj test@localhost"
+
+  test 'create and update key' do
+    u1 = users(:active)
+    act_as_user u1 do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+      assert ak.save, ak.errors.full_messages.to_s
+      ak.name = "bar"
+      assert ak.valid?, ak.errors.full_messages.to_s
+      assert ak.save, ak.errors.full_messages.to_s
+    end
+  end
+
+  test 'duplicate key not permitted' do
+    u1 = users(:active)
+    act_as_user u1 do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY, authorized_user_uuid: u1.uuid)
+      assert ak.save
+    end
+    u2 = users(:spectator)
+    act_as_user u2 do
+      ak2 = AuthorizedKey.new(name: "bar", public_key: TEST_KEY, authorized_user_uuid: u2.uuid)
+      refute ak2.valid?
+      refute ak2.save
+      assert_match /already exists/, ak2.errors.full_messages.to_s
+    end
+  end
+
+  test 'attach key to wrong user account' do
+    act_as_user users(:active) do
+      ak = AuthorizedKey.new(name: "foo", public_key: TEST_KEY)
+      ak.authorized_user_uuid = users(:spectator).uuid
+      refute ak.save
+      ak.uuid = nil
+      ak.authorized_user_uuid = users(:admin).uuid
+      refute ak.save
+      ak.uuid = nil
+      ak.authorized_user_uuid = users(:active).uuid
+      assert ak.save, ak.errors.full_messages.to_s
+      ak.authorized_user_uuid = users(:admin).uuid
+      refute ak.save
+    end
+  end
  end
diff --git a/services/api/test/unit/collection_performance_test.rb b/services/api/test/unit/collection_performance_test.rb

new file mode 100644 (file)

index 0000000..37da5fd
--- /dev/null
+++ b/services/api/test/unit/collection_performance_test.rb
@@ -0,0 +1,62 @@
+require 'test_helper'
+require 'helpers/manifest_examples'
+require 'helpers/time_block'
+
+class CollectionModelPerformanceTest < ActiveSupport::TestCase
+  include ManifestExamples
+
+  setup do
+    # The Collection model needs to have a current token, not just a
+    # current user, to sign & verify manifests:
+    Thread.current[:api_client_authorization] =
+      api_client_authorizations(:active)
+  end
+
+  teardown do
+    Thread.current[:api_client_authorization] = nil
+  end
+
+  # "crrud" == "create read render update delete", not a typo
+  test "crrud cycle for a collection with a big manifest)" do
+    bigmanifest = time_block 'make example' do
+      make_manifest(streams: 100,
+                    files_per_stream: 100,
+                    blocks_per_file: 20,
+                    bytes_per_block: 2**26,
+                    api_token: api_token(:active))
+    end
+    act_as_user users(:active) do
+      c = time_block "new (manifest_text is #{bigmanifest.length>>20}MiB)" do
+        Collection.new manifest_text: bigmanifest.dup
+      end
+      time_block 'check signatures' do
+        c.check_signatures
+      end
+      time_block 'check signatures + save' do
+        c.instance_eval do @signatures_checked = false end
+        c.save!
+      end
+      c = time_block 'read' do
+        Collection.find_by_uuid(c.uuid)
+      end
+      time_block 'sign' do
+        c.signed_manifest_text
+      end
+      time_block 'sign + render' do
+        resp = c.as_api_response(nil)
+      end
+      loc = Blob.sign_locator(Digest::MD5.hexdigest('foo') + '+3',
+                              api_token: api_token(:active))
+      # Note Collection's strip_manifest_text method has now removed
+      # the signatures from c.manifest_text, so we have to start from
+      # bigmanifest again here instead of just appending with "+=".
+      c.manifest_text = bigmanifest.dup + ". #{loc} 0:3:foo.txt\n"
+      time_block 'update' do
+        c.save!
+      end
+      time_block 'delete' do
+        c.destroy
+      end
+    end
+  end
+end
diff --git a/services/api/test/unit/collection_test.rb b/services/api/test/unit/collection_test.rb

index d8b8365efa212f3447aceddec6decd2154520584..c81d543ebb02403866535561fd7b179aba6e8adb 100644 (file)
--- a/services/api/test/unit/collection_test.rb
+++ b/services/api/test/unit/collection_test.rb
@@ -39,6 +39,66 @@ class CollectionTest < ActiveSupport::TestCase
      end
    end
  
+  [
+    ". 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
+    "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+  ].each do |manifest_text|
+    test "create collection with invalid manifest text #{manifest_text} and expect error" do
+      act_as_system_user do
+        c = Collection.create(manifest_text: manifest_text)
+        assert !c.valid?
+      end
+    end
+  end
+
+  [
+    nil,
+    "",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
+  ].each do |manifest_text|
+    test "create collection with valid manifest text #{manifest_text.inspect} and expect success" do
+      act_as_system_user do
+        c = Collection.create(manifest_text: manifest_text)
+        assert c.valid?
+      end
+    end
+  end
+
+  [
+    ". 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e foo.txt",
+    "d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt",
+  ].each do |manifest_text|
+    test "update collection with invalid manifest text #{manifest_text} and expect error" do
+      act_as_system_user do
+        c = create_collection 'foo', Encoding::US_ASCII
+        assert c.valid?
+
+        c.update_attribute 'manifest_text', manifest_text
+        assert !c.valid?
+      end
+    end
+  end
+
+  [
+    nil,
+    "",
+    ". d41d8cd98f00b204e9800998ecf8427e 0:0:foo.txt\n",
+  ].each do |manifest_text|
+    test "update collection with valid manifest text #{manifest_text.inspect} and expect success" do
+      act_as_system_user do
+        c = create_collection 'foo', Encoding::US_ASCII
+        assert c.valid?
+
+        c.update_attribute 'manifest_text', manifest_text
+        assert c.valid?
+      end
+    end
+  end
+
    test 'create and update collection and verify file_names' do
      act_as_system_user do
        c = create_collection 'foo', Encoding::US_ASCII
@@ -56,27 +116,27 @@ class CollectionTest < ActiveSupport::TestCase
    [
      [2**8, false],
      [2**18, true],
-  ].each do |manifest_size, gets_truncated|
-    test "create collection with manifest size #{manifest_size} which gets truncated #{gets_truncated},
+  ].each do |manifest_size, allow_truncate|
+    test "create collection with manifest size #{manifest_size} with allow_truncate=#{allow_truncate},
            and not expect exceptions even on very large manifest texts" do
        # file_names has a max size, hence there will be no errors even on large manifests
        act_as_system_user do
-        manifest_text = './blurfl d41d8cd98f00b204e9800998ecf8427e+0'
+        manifest_text = ''
          index = 0
          while manifest_text.length < manifest_size
-          manifest_text += ' ' + "0:0:veryverylongfilename000000000000#{index}.txt\n./subdir1"
+          manifest_text += "./blurfl d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylongfilename000000000000#{index}.txt\n"
            index += 1
          end
-        manifest_text += "\n"
+        manifest_text += "./laststreamname d41d8cd98f00b204e9800998ecf8427e+0 0:0:veryverylastfilename.txt\n"
          c = Collection.create(manifest_text: manifest_text)
  
          assert c.valid?
          assert c.file_names
          assert_match /veryverylongfilename0000000000001.txt/, c.file_names
          assert_match /veryverylongfilename0000000000002.txt/, c.file_names
-        if !gets_truncated
-          assert_match /blurfl/, c.file_names
-          assert_match /subdir1/, c.file_names
+        if not allow_truncate
+          assert_match /veryverylastfilename/, c.file_names
+          assert_match /laststreamname/, c.file_names
          end
        end
      end
@@ -87,7 +147,7 @@ class CollectionTest < ActiveSupport::TestCase
      act_as_system_user do
        Collection.create(manifest_text: ". acbd18db4cc2f85cedef654fccc4a4d8+3 0:3:foo\n")
        Collection.create(manifest_text: ". 37b51d194a7513e45b56f6524f2d51f2+3 0:3:bar\n")
-      Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt")
+      Collection.create(manifest_text: ". 85877ca2d7e05498dd3d109baf2df106+95+A3a4e26a366ee7e4ed3e476ccf05354761be2e4ae@545a9920 0:95:file_in_subdir1\n./subdir2/subdir3 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file1.txt 32:32:file2.txt\n./subdir2/subdir3/subdir4 2bbc341c702df4d8f42ec31f16c10120+64+A315d7e7bad2ce937e711fc454fae2d1194d14d64@545a9920 0:32:file3.txt 32:32:file4.txt\n")
      end
  
      [
@@ -120,12 +180,12 @@ class CollectionTest < ActiveSupport::TestCase
    end
  
    test 'portable data hash with missing size hints' do
-    [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x",
-      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x"],
-     [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x",
-      ". d41d8cd98f00b204e9800998ecf8427e 0:0:x"],
-     [". d41d8cd98f00b204e9800998ecf8427e 0:0:x",
-      ". d41d8cd98f00b204e9800998ecf8427e 0:0:x"],
+    [[". d41d8cd98f00b204e9800998ecf8427e+0+Bar 0:0:x\n",
+      ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"],
+     [". d41d8cd98f00b204e9800998ecf8427e+Foo 0:0:x\n",
+      ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
+     [". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n",
+      ". d41d8cd98f00b204e9800998ecf8427e 0:0:x\n"],
      ].each do |unportable, portable|
        c = Collection.new(manifest_text: unportable)
        assert c.valid?
@@ -134,6 +194,26 @@ class CollectionTest < ActiveSupport::TestCase
      end
    end
  
+  pdhmanifest = ". d41d8cd98f00b204e9800998ecf8427e+0 0:0:x\n"
+  pdhmd5 = Digest::MD5.hexdigest pdhmanifest
+  [[true, nil],
+   [true, pdhmd5],
+   [true, pdhmd5+'+12345'],
+   [true, pdhmd5+'+'+pdhmanifest.length.to_s],
+   [true, pdhmd5+'+12345+Foo'],
+   [true, pdhmd5+'+Foo'],
+   [false, Digest::MD5.hexdigest(pdhmanifest.strip)],
+   [false, Digest::MD5.hexdigest(pdhmanifest.strip)+'+'+pdhmanifest.length.to_s],
+   [false, pdhmd5[0..30]],
+   [false, pdhmd5[0..30]+'z'],
+   [false, pdhmd5[0..24]+'000000000'],
+   [false, pdhmd5[0..24]+'000000000+0']].each do |isvalid, pdh|
+    test "portable_data_hash #{pdh.inspect} valid? == #{isvalid}" do
+      c = Collection.new manifest_text: pdhmanifest, portable_data_hash: pdh
+      assert_equal isvalid, c.valid?, c.errors.full_messages.to_s
+    end
+  end
+
    [0, 2, 4, nil].each do |ask|
      test "set replication_desired to #{ask.inspect}" do
        Rails.configuration.default_collection_replication = 2
@@ -253,4 +333,11 @@ class CollectionTest < ActiveSupport::TestCase
        assert c.valid?
      end
    end
+
+  test "find_all_for_docker_image resolves names that look like hashes" do
+    coll_list = Collection.
+      find_all_for_docker_image('a' * 64, nil, [users(:active)])
+    coll_uuids = coll_list.map(&:uuid)
+    assert_includes(coll_uuids, collections(:docker_image).uuid)
+  end
  end
diff --git a/services/api/test/unit/commit_test.rb b/services/api/test/unit/commit_test.rb

index 2424af32755cf08c4eefbb96cecc081d35f68ef0..b57c23b4538dee4339a0a27630a1ad36e7e575a6 100644 (file)
--- a/services/api/test/unit/commit_test.rb
+++ b/services/api/test/unit/commit_test.rb
@@ -1,7 +1,217 @@
  require 'test_helper'
+require 'helpers/git_test_helper'
+
+# NOTE: calling Commit.find_commit_range(nil, nil, 'rev')
+# produces an error message "fatal: bad object 'rev'" on stderr if
+# 'rev' does not exist in a given repository.  Many of these tests
+# report such errors; their presence does not represent a fatal
+# condition.
  
  class CommitTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  # See git_setup.rb for the commit log for test.git.tar
+  include GitTestHelper
+
+  setup do
+    authorize_with :active
+  end
+
+  test 'find_commit_range does not bypass permissions' do
+    authorize_with :inactive
+    assert_raises ArgumentError do
+      c = Commit.find_commit_range 'foo', nil, 'master', []
+    end
+  end
+
+  [
+   'https://github.com/curoverse/arvados.git',
+   'http://github.com/curoverse/arvados.git',
+   'git://github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range uses fetch_remote_repository to get #{url}" do
+      fake_gitdir = repositories(:foo).server_path
+      Commit.expects(:cache_dir_for).once.with(url).returns fake_gitdir
+      Commit.expects(:fetch_remote_repository).once.with(fake_gitdir, url).returns true
+      c = Commit.find_commit_range url, nil, 'master', []
+      refute_empty c
+    end
+  end
+
+  [
+   'bogus/repo',
+   '/bogus/repo',
+   '/not/allowed/.git',
+   'file:///not/allowed.git',
+   'git.curoverse.com/arvados.git',
+   'github.com/curoverse/arvados.git',
+  ].each do |url|
+    test "find_commit_range skips fetch_remote_repository for #{url}" do
+      Commit.expects(:fetch_remote_repository).never
+      assert_raises ArgumentError do
+        Commit.find_commit_range url, nil, 'master', []
+      end
+    end
+  end
+
+  test 'fetch_remote_repository does not leak commits across repositories' do
+    url = "http://localhost:1/fake/fake.git"
+    fetch_remote_from_local_repo url, :foo
+    c = Commit.find_commit_range url, nil, 'master', []
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57'], c
+
+    url = "http://localhost:2/fake/fake.git"
+    fetch_remote_from_local_repo url, 'file://' + File.expand_path('../../.git', Rails.root)
+    c = Commit.find_commit_range url, nil, '077ba2ad3ea24a929091a9e6ce545c93199b8e57', []
+    assert_equal [], c
+  end
+
+  test 'tag_in_internal_repository creates and updates tags in internal.git' do
+    authorize_with :active
+    gitint = "git --git-dir #{Rails.configuration.git_internal_dir}"
+    IO.read("|#{gitint} tag -d testtag 2>/dev/null") # "no such tag", fine
+    assert_match /^fatal: /, IO.read("|#{gitint} show testtag 2>&1")
+    refute $?.success?
+    Commit.tag_in_internal_repository 'active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', 'testtag'
+    assert_match /^commit 31ce37f/, IO.read("|#{gitint} show testtag")
+    assert $?.success?
+  end
+
+  # In active/shabranchnames, "7387838c69a21827834586cc42b467ff6c63293b" is
+  # both a commit hash, and the name of a branch that begins from that same
+  # commit.
+  COMMIT_BRANCH_NAME = "7387838c69a21827834586cc42b467ff6c63293b"
+  # A commit that appears in the branch after 7387838c.
+  COMMIT_BRANCH_COMMIT_2 = "abec49829bf1758413509b7ffcab32a771b71e81"
+  # "738783" is another branch that starts from the above commit.
+  SHORT_COMMIT_BRANCH_NAME = COMMIT_BRANCH_NAME[0, 6]
+  # A commit that appears in branch 738783 after 7387838c.
+  SHORT_BRANCH_COMMIT_2 = "77e1a93093663705a63bb4d505698047e109dedd"
+
+  test "find_commit_range min_version prefers commits over branch names" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          COMMIT_BRANCH_NAME, nil, nil))
+  end
+
+  test "find_commit_range max_version prefers commits over branch names" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          nil, COMMIT_BRANCH_NAME, nil))
+  end
+
+  test "find_commit_range min_version with short branch name" do
+    assert_equal([SHORT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          SHORT_COMMIT_BRANCH_NAME, nil, nil))
+  end
+
+  test "find_commit_range max_version with short branch name" do
+    assert_equal([SHORT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          nil, SHORT_COMMIT_BRANCH_NAME, nil))
+  end
+
+  test "find_commit_range min_version with disambiguated branch name" do
+    assert_equal([COMMIT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          "heads/#{COMMIT_BRANCH_NAME}",
+                                          nil, nil))
+  end
+
+  test "find_commit_range max_version with disambiguated branch name" do
+    assert_equal([COMMIT_BRANCH_COMMIT_2],
+                 Commit.find_commit_range("active/shabranchnames", nil,
+                                          "heads/#{COMMIT_BRANCH_NAME}", nil))
+  end
+
+  test "find_commit_range min_version with unambiguous short name" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames",
+                                          COMMIT_BRANCH_NAME[0..-2], nil, nil))
+  end
+
+  test "find_commit_range max_version with unambiguous short name" do
+    assert_equal([COMMIT_BRANCH_NAME],
+                 Commit.find_commit_range("active/shabranchnames", nil,
+                                          COMMIT_BRANCH_NAME[0..-2], nil))
+  end
+
+  test "find_commit_range laundry list" do
+    authorize_with :active
+
+    # single
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_branch1" do
+    a = Commit.find_commit_range('active/foo', nil, 'master', nil)
+    assert_includes(a, '077ba2ad3ea24a929091a9e6ce545c93199b8e57')
+
+    #test "test_branch2" do
+    a = Commit.find_commit_range('active/foo', nil, 'b1', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_branch3" do
+    a = Commit.find_commit_range('active/foo', nil, 'HEAD', nil)
+    assert_equal ['1de84a854e2b440dc53bf42f8548afa4c17da332'], a
+
+    #test "test_single_revision_repo" do
+    a = Commit.find_commit_range('active/foo', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal ['31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+    a = Commit.find_commit_range('arvados', nil, '31ce37fe365b3dc204300a3e4c396ad333ed0556', nil)
+    assert_equal [], a
+
+    #test "test_multi_revision" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_tag" do
+    # complains "fatal: ambiguous argument 'tag1': unknown revision or path
+    # not in the working tree."
+    a = Commit.find_commit_range('active/foo', 'tag1', 'master', nil)
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '4fe459abe02d9b365932b8f5dc419439ab4e2577'], a
+
+    #test "test_multi_revision_exclude" do
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['4fe459abe02d9b365932b8f5dc419439ab4e2577'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    #test "test_multi_revision_tagged_exclude" do
+    # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+    a = Commit.find_commit_range('active/foo', '31ce37fe365b3dc204300a3e4c396ad333ed0556', '077ba2ad3ea24a929091a9e6ce545c93199b8e57', ['tag1'])
+    assert_equal ['077ba2ad3ea24a929091a9e6ce545c93199b8e57', '31ce37fe365b3dc204300a3e4c396ad333ed0556'], a
+
+    Dir.mktmpdir do |touchdir|
+      # invalid input to maximum
+      a = Commit.find_commit_range('active/foo', nil, "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to maximum
+      a = Commit.find_commit_range('active/foo', nil, "$(uname>#{touchdir}/uh_oh)", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'maximum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556 ; touch #{touchdir}/uh_oh", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to minimum
+      a = Commit.find_commit_range('active/foo', "$(uname>#{touchdir}/uh_oh)", "31ce37fe365b3dc204300a3e4c396ad333ed0556", nil)
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'minimum' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["4fe459abe02d9b365932b8f5dc419439ab4e2577 ; touch #{touchdir}/uh_oh"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+
+      # invalid input to 'excludes'
+      # complains "fatal: bad object 077ba2ad3ea24a929091a9e6ce545c93199b8e57"
+      a = Commit.find_commit_range('active/foo', "31ce37fe365b3dc204300a3e4c396ad333ed0556", "077ba2ad3ea24a929091a9e6ce545c93199b8e57", ["$(uname>#{touchdir}/uh_oh)"])
+      assert !File.exists?("#{touchdir}/uh_oh"), "#{touchdir}/uh_oh should not exist, 'excludes' parameter of find_commit_range is exploitable"
+      assert_equal [], a
+    end
+  end
  end
diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb

index 24bc2600103b2589c578bad481f05e0b0b309867..f16c8b2ec497e0caaaf5d78bdf5bb063b58badad 100644 (file)
--- a/services/api/test/unit/job_test.rb
+++ b/services/api/test/unit/job_test.rb
@@ -15,7 +15,7 @@ class JobTest < ActiveSupport::TestCase
      {
        script: "hash",
        script_version: "master",
-      repository: "foo",
+      repository: "active/foo",
      }.merge(merge_me)
    end
  
@@ -78,6 +78,36 @@ class JobTest < ActiveSupport::TestCase
      assert(job.invalid?, "Job with bad Docker tag valid")
    end
  
+  [
+    false,
+    true
+  ].each do |use_config|
+    test "Job with no Docker image uses default docker image when configuration is set #{use_config}" do
+      default_docker_image = collections(:docker_image)[:portable_data_hash]
+      Rails.configuration.default_docker_image_for_jobs = default_docker_image if use_config
+
+      job = Job.new job_attrs
+      assert job.valid?, job.errors.full_messages.to_s
+
+      if use_config
+        refute_nil job.docker_image_locator
+        assert_equal default_docker_image, job.docker_image_locator
+      else
+        assert_nil job.docker_image_locator
+      end
+    end
+  end
+
+  test "create a job with a disambiguated script_version branch name" do
+    job = Job.
+      new(script: "testscript",
+          script_version: "heads/7387838c69a21827834586cc42b467ff6c63293b",
+          repository: "active/shabranchnames",
+          script_parameters: {})
+    assert(job.save)
+    assert_equal("abec49829bf1758413509b7ffcab32a771b71e81", job.script_version)
+  end
+
    test "locate a Docker image with a partial hash" do
      image_hash = links(:docker_image_collection_hash).name[0..24]
      job = Job.new job_attrs(runtime_constraints:
@@ -400,4 +430,15 @@ class JobTest < ActiveSupport::TestCase
      job = Job.create!(job_attrs(good_params))
      assert job.valid?
    end
+
+  test 'update job uuid tag in internal.git when version changes' do
+    authorize_with :active
+    j = jobs :queued
+    j.update_attributes repository: 'active/foo', script_version: 'b1'
+    assert_equal('1de84a854e2b440dc53bf42f8548afa4c17da332',
+                 internal_tag(j.uuid))
+    j.update_attributes repository: 'active/foo', script_version: 'master'
+    assert_equal('077ba2ad3ea24a929091a9e6ce545c93199b8e57',
+                 internal_tag(j.uuid))
+  end
  end
diff --git a/services/api/test/unit/node_test.rb b/services/api/test/unit/node_test.rb

index 37e95db0e18cfb555de27a7b127d1d90cf132052..e5b88354fb128e1308c1a00a7c9e297928f191dd 100644 (file)
--- a/services/api/test/unit/node_test.rb
+++ b/services/api/test/unit/node_test.rb
@@ -26,4 +26,103 @@ class NodeTest < ActiveSupport::TestCase
      assert_nil node.slot_number, "fixture is not what I expected"
      assert_equal 'down', node.crunch_worker_state, "wrong worker state"
    end
+
+  test "dns_server_conf_template" do
+    Rails.configuration.dns_server_conf_dir = Rails.root.join 'tmp'
+    Rails.configuration.dns_server_conf_template = Rails.root.join 'config', 'unbound.template'
+    conffile = Rails.root.join 'tmp', 'compute65535.conf'
+    File.unlink conffile rescue nil
+    assert Node.dns_server_update 'compute65535', '127.0.0.1'
+    assert_match /\"1\.0\.0\.127\.in-addr\.arpa\. IN PTR compute65535\.zzzzz\.arvadosapi\.com\"/, IO.read(conffile)
+    File.unlink conffile
+  end
+
+  test "dns_server_restart_command" do
+    Rails.configuration.dns_server_conf_dir = Rails.root.join 'tmp'
+    Rails.configuration.dns_server_reload_command = 'foobar'
+    restartfile = Rails.root.join 'tmp', 'restart.txt'
+    File.unlink restartfile rescue nil
+    assert Node.dns_server_update 'compute65535', '127.0.0.127'
+    assert_equal "foobar\n", IO.read(restartfile)
+    File.unlink restartfile
+  end
+
+  test "dns_server_restart_command fail" do
+    Rails.configuration.dns_server_conf_dir = Rails.root.join 'tmp', 'bogusdir'
+    Rails.configuration.dns_server_reload_command = 'foobar'
+    refute Node.dns_server_update 'compute65535', '127.0.0.127'
+  end
+
+  test "dns_server_update_command with valid command" do
+    testfile = Rails.root.join('tmp', 'node_test_dns_server_update_command.txt')
+    Rails.configuration.dns_server_update_command =
+      ('echo -n "%{hostname} == %{ip_address}" >' +
+       testfile.to_s.shellescape)
+    assert Node.dns_server_update 'compute65535', '127.0.0.1'
+    assert_equal 'compute65535 == 127.0.0.1', IO.read(testfile)
+    File.unlink testfile
+  end
+
+  test "dns_server_update_command with failing command" do
+    Rails.configuration.dns_server_update_command = 'false %{hostname}'
+    refute Node.dns_server_update 'compute65535', '127.0.0.1'
+  end
+
+  test "dns update with no commands/dirs configured" do
+    Rails.configuration.dns_server_update_command = false
+    Rails.configuration.dns_server_conf_dir = false
+    Rails.configuration.dns_server_conf_template = 'ignored!'
+    Rails.configuration.dns_server_reload_command = 'ignored!'
+    assert Node.dns_server_update 'compute65535', '127.0.0.127'
+  end
+
+  test "ping new node with no hostname and default config" do
+    node = ping_node(:new_with_no_hostname, {})
+    slot_number = node.slot_number
+    refute_nil slot_number
+    assert_equal("compute#{slot_number}", node.hostname)
+  end
+
+  test "ping new node with no hostname and no config" do
+    Rails.configuration.assign_node_hostname = false
+    node = ping_node(:new_with_no_hostname, {})
+    refute_nil node.slot_number
+    assert_nil node.hostname
+  end
+
+  test "ping new node with zero padding config" do
+    Rails.configuration.assign_node_hostname = 'compute%<slot_number>04d'
+    node = ping_node(:new_with_no_hostname, {})
+    slot_number = node.slot_number
+    refute_nil slot_number
+    assert_equal("compute000#{slot_number}", node.hostname)
+  end
+
+  test "ping node with hostname and config and expect hostname unchanged" do
+    node = ping_node(:new_with_custom_hostname, {})
+    assert_equal(23, node.slot_number)
+    assert_equal("custom1", node.hostname)
+  end
+
+  test "ping node with hostname and no config and expect hostname unchanged" do
+    Rails.configuration.assign_node_hostname = false
+    node = ping_node(:new_with_custom_hostname, {})
+    assert_equal(23, node.slot_number)
+    assert_equal("custom1", node.hostname)
+  end
+
+  # Ping two nodes: one without a hostname and the other with a hostname.
+  # Verify that the first one gets a hostname and second one is unchanged.
+  test "ping two nodes one with no hostname and one with hostname and check hostnames" do
+    # ping node with no hostname and expect it set with config format
+    node = ping_node(:new_with_no_hostname, {})
+    slot_number = node.slot_number
+    refute_nil node.slot_number
+    assert_equal "compute#{slot_number}", node.hostname
+
+    # ping node with a hostname and expect it to be unchanged
+    node2 = ping_node(:new_with_custom_hostname, {})
+    refute_nil node2.slot_number
+    assert_equal "custom1", node2.hostname
+  end
  end
diff --git a/services/api/test/unit/permission_test.rb b/services/api/test/unit/permission_test.rb

index 20cffdaaa743cee800ff484a8709ca18f30f097f..4a6ddc69fbcb1703c3234e4e6e1360a779b2f4b3 100644 (file)
--- a/services/api/test/unit/permission_test.rb
+++ b/services/api/test/unit/permission_test.rb
@@ -353,18 +353,4 @@ class PermissionTest < ActiveSupport::TestCase
        ob.update_attributes!(owner_uuid: groups(:aproject).uuid)
      end
    end
-
-  test "active user cannot write admin's repo" do
-    set_user_from_auth :active
-    assert_raises ArvadosModel::PermissionDeniedError, "pwned" do
-      repositories(:repository3).update_attributes(name: "kilroy")
-    end
-  end
-
-  test "active user cannot change repo name via can_manage permission" do
-    set_user_from_auth :active
-    assert_raises ArvadosModel::PermissionDeniedError, "pwned" do
-      repositories(:foo).update_attributes(name: "arvados")
-    end
-  end
  end
diff --git a/services/api/test/unit/repository_test.rb b/services/api/test/unit/repository_test.rb

index 327170c1089cccbf154d4e7c386b06f23af7b65a..288e1184fa2be2cd2ab955edeff6356f5d6e5cd3 100644 (file)
--- a/services/api/test/unit/repository_test.rb
+++ b/services/api/test/unit/repository_test.rb
@@ -1,7 +1,279 @@
  require 'test_helper'
+require 'helpers/git_test_helper'
  
  class RepositoryTest < ActiveSupport::TestCase
-  # test "the truth" do
-  #   assert true
-  # end
+  include GitTestHelper
+
+  def new_repo(owner_key, attrs={})
+    set_user_from_auth owner_key
+    owner = users(owner_key)
+    Repository.new({owner_uuid: owner.uuid}.merge(attrs))
+  end
+
+  def changed_repo(repo_key, changes)
+    repo = repositories(repo_key)
+    changes.each_pair { |attr, value| repo.send("#{attr}=".to_sym, value) }
+    repo
+  end
+
+  def default_git_url(repo_name, user_name=nil)
+    if user_name
+      "git@git.%s.arvadosapi.com:%s/%s.git" %
+        [Rails.configuration.uuid_prefix, user_name, repo_name]
+    else
+      "git@git.%s.arvadosapi.com:%s.git" %
+        [Rails.configuration.uuid_prefix, repo_name]
+    end
+  end
+
+  def assert_server_path(path_tail, repo_sym)
+    assert_equal(File.join(Rails.configuration.git_repositories_dir, path_tail),
+                 repositories(repo_sym).server_path)
+  end
+
+  ### name validation
+
+  {active: "active/", admin: "admin/", system_user: ""}.
+      each_pair do |user_sym, name_prefix|
+    %w(a aa a0 aA Aa AA A0).each do |name|
+      test "'#{name_prefix}#{name}' is a valid name for #{user_sym} repo" do
+        repo = new_repo(user_sym, name: name_prefix + name)
+        assert(repo.valid?)
+      end
+    end
+
+    test "name is required for #{user_sym} repo" do
+      refute(new_repo(user_sym).valid?)
+    end
+
+    test "repo name beginning with numeral is invalid for #{user_sym}" do
+      repo = new_repo(user_sym, name: "#{name_prefix}0a")
+      refute(repo.valid?)
+    end
+
+    "\\.-_/!@#$%^&*()[]{}".each_char do |bad_char|
+      test "name containing #{bad_char.inspect} is invalid for #{user_sym}" do
+        repo = new_repo(user_sym, name: "#{name_prefix}bad#{bad_char}reponame")
+        refute(repo.valid?)
+      end
+    end
+  end
+
+  test "admin can create valid repo for other user with correct name prefix" do
+    owner = users(:active)
+    repo = new_repo(:admin, name: "#{owner.username}/validnametest",
+                    owner_uuid: owner.uuid)
+    assert(repo.valid?)
+  end
+
+  test "admin can create valid system repo without name prefix" do
+    repo = new_repo(:admin, name: "validnametest",
+                    owner_uuid: users(:system_user).uuid)
+    assert(repo.valid?)
+  end
+
+  test "repo name prefix must match owner_uuid username" do
+    repo = new_repo(:admin, name: "admin/badusernametest",
+                    owner_uuid: users(:active).uuid)
+    refute(repo.valid?)
+  end
+
+  test "repo name prefix must be empty for system repo" do
+    repo = new_repo(:admin, name: "root/badprefixtest",
+                    owner_uuid: users(:system_user).uuid)
+    refute(repo.valid?)
+  end
+
+  ### owner validation
+
+  test "name must be unique per user" do
+    repo = new_repo(:active, name: repositories(:foo).name)
+    refute(repo.valid?)
+  end
+
+  test "name can be duplicated across users" do
+    repo = new_repo(:active, name: "active/#{repositories(:arvados).name}")
+    assert(repo.valid?)
+  end
+
+  test "repository cannot be owned by a group" do
+    set_user_from_auth :active
+    repo = Repository.new(owner_uuid: groups(:all_users).uuid,
+                          name: "ownedbygroup")
+    refute(repo.valid?)
+    refute_empty(repo.errors[:owner_uuid] || [])
+  end
+
+  ### URL generation
+
+  test "fetch_url" do
+    repo = new_repo(:active, name: "active/fetchtest")
+    repo.save
+    assert_equal(default_git_url("fetchtest", "active"), repo.fetch_url)
+  end
+
+  test "fetch_url owned by system user" do
+    set_user_from_auth :admin
+    repo = Repository.new(owner_uuid: users(:system_user).uuid,
+                          name: "fetchtest")
+    repo.save
+    assert_equal(default_git_url("fetchtest"), repo.fetch_url)
+  end
+
+  test "push_url" do
+    repo = new_repo(:active, name: "active/pushtest")
+    repo.save
+    assert_equal(default_git_url("pushtest", "active"), repo.push_url)
+  end
+
+  test "push_url owned by system user" do
+    set_user_from_auth :admin
+    repo = Repository.new(owner_uuid: users(:system_user).uuid,
+                          name: "pushtest")
+    repo.save
+    assert_equal(default_git_url("pushtest"), repo.push_url)
+  end
+
+  ### Path generation
+
+  test "disk path stored by UUID" do
+    assert_server_path("zzzzz-s0uqq-382brsig8rp3666/.git", :foo)
+  end
+
+  test "disk path stored by name" do
+    assert_server_path("arvados/.git", :arvados)
+  end
+
+  test "disk path for repository not on disk" do
+    assert_nil(Repository.new.server_path)
+  end
+
+  ### Repository creation
+
+  test "non-admin can create a repository for themselves" do
+    repo = new_repo(:active, name: "active/newtestrepo")
+    assert(repo.save)
+  end
+
+  test "non-admin can't create a repository for another visible user" do
+    repo = new_repo(:active, name: "repoforanon",
+                    owner_uuid: users(:anonymous).uuid)
+    assert_not_allowed { repo.save }
+  end
+
+  test "admin can create a repository for themselves" do
+    repo = new_repo(:admin, name: "admin/newtestrepo")
+    assert(repo.save)
+  end
+
+  test "admin can create a repository for others" do
+    repo = new_repo(:admin, name: "active/repoforactive",
+                    owner_uuid: users(:active).uuid)
+    assert(repo.save)
+  end
+
+  test "admin can create a system repository" do
+    repo = new_repo(:admin, name: "repoforsystem",
+                    owner_uuid: users(:system_user).uuid)
+    assert(repo.save)
+  end
+
+  ### Repository destruction
+
+  test "non-admin can destroy their own repository" do
+    set_user_from_auth :active
+    assert(repositories(:foo).destroy)
+  end
+
+  test "non-admin can't destroy others' repository" do
+    set_user_from_auth :active
+    assert_not_allowed { repositories(:repository3).destroy }
+  end
+
+  test "non-admin can't destroy system repository" do
+    set_user_from_auth :active
+    assert_not_allowed { repositories(:arvados).destroy }
+  end
+
+  test "admin can destroy their own repository" do
+    set_user_from_auth :admin
+    assert(repositories(:repository3).destroy)
+  end
+
+  test "admin can destroy others' repository" do
+    set_user_from_auth :admin
+    assert(repositories(:foo).destroy)
+  end
+
+  test "admin can destroy system repository" do
+    set_user_from_auth :admin
+    assert(repositories(:arvados).destroy)
+  end
+
+  ### Changing ownership
+
+  test "non-admin can't make their repository a system repository" do
+    set_user_from_auth :active
+    repo = changed_repo(:foo, owner_uuid: users(:system_user).uuid)
+    assert_not_allowed { repo.save }
+  end
+
+  test "admin can give their repository to someone else" do
+    set_user_from_auth :admin
+    repo = changed_repo(:repository3, owner_uuid: users(:active).uuid,
+                        name: "active/foo3")
+    assert(repo.save)
+  end
+
+  test "admin can make their repository a system repository" do
+    set_user_from_auth :admin
+    repo = changed_repo(:repository3, owner_uuid: users(:system_user).uuid,
+                        name: "foo3")
+    assert(repo.save)
+  end
+
+  test 'write permission allows changing modified_at' do
+    act_as_user users(:active) do
+      r = repositories(:foo)
+      modtime_was = r.modified_at
+      r.modified_at = Time.now
+      assert r.save
+      assert_operator modtime_was, :<, r.modified_at
+    end
+  end
+
+  test 'write permission necessary for changing modified_at' do
+    act_as_user users(:spectator) do
+      r = repositories(:foo)
+      modtime_was = r.modified_at
+      r.modified_at = Time.now
+      assert_raises ArvadosModel::PermissionDeniedError do
+        r.save!
+      end
+      r.reload
+      assert_equal modtime_was, r.modified_at
+    end
+  end
+
+  ### Renaming
+
+  test "non-admin can rename own repo" do
+    act_as_user users(:active) do
+      assert repositories(:foo).update_attributes(name: 'active/foo12345')
+    end
+  end
+
+  test "top level repo can be touched by non-admin with can_manage" do
+    add_permission_link users(:active), repositories(:arvados), 'can_manage'
+    act_as_user users(:active) do
+      assert changed_repo(:arvados, modified_at: Time.now).save
+    end
+  end
+
+  test "top level repo cannot be renamed by non-admin with can_manage" do
+    add_permission_link users(:active), repositories(:arvados), 'can_manage'
+    act_as_user users(:active) do
+      assert_not_allowed { changed_repo(:arvados, name: 'xarvados').save }
+    end
+  end
  end
diff --git a/services/api/test/unit/salvage_collection_test.rb b/services/api/test/unit/salvage_collection_test.rb

new file mode 100644 (file)

index 0000000..a269078
--- /dev/null
+++ b/services/api/test/unit/salvage_collection_test.rb
@@ -0,0 +1,165 @@
+require 'test_helper'
+require 'salvage_collection'
+require 'shellwords'
+
+# Valid manifest_text
+TEST_MANIFEST = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626+Ad391622a17f61e4a254eda85d1ca751c4f368da9@55e076ce 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:82570:brca1-hg19.fa\n"
+TEST_MANIFEST_STRIPPED = ". 341dabea2bd78ad0d6fc3f5b926b450e+85626 0:85626:brca2-hg19.fa\n. d7321a918923627c972d8f8080c07d29+82570 0:82570:brca1-hg19.fa\n"
+
+# This invalid manifest_text has the following flaws:
+#   Missing stream name with locator in it's place
+#   Invalid locators:
+#     foofaafaafaabd78ad0d6fc3f5b926b450e+foo
+#     bar-baabaabaabd78ad0d6fc3f5b926b450e
+#     bad12345dae58ad0d6fc3f5b926b450e+
+#     341dabea2bd78ad0d6fc3f5b926b450e+abc
+#     341dabea2bd78ad0d6fc3f5b926abcdf
+# Expectation: All these locators are preserved in salvaged_data
+BAD_MANIFEST = "faafaafaabd78ad0d6fc3f5b926b450e+foo bar-baabaabaabd78ad0d6fc3f5b926b450e_bad12345dae58ad0d6fc3f5b926b450e+ 341dabea2bd78ad0d6fc3f5b926b450e+abc 341dabea2bd78ad0d6fc3f5b926abcdf 0:85626:brca2-hg19.fa\n. abcdabea2bd78ad0d6fc3f5b926b450e+1000 0:1000:brca-hg19.fa\n. d7321a918923627c972d8f8080c07d29+2000+A22e0a1d9b9bc85c848379d98bedc64238b0b1532@55e076ce 0:2000:brca1-hg19.fa\n"
+
+class SalvageCollectionTest < ActiveSupport::TestCase
+  include SalvageCollection
+
+  setup do
+    set_user_from_auth :admin
+    # arv-put needs ARV env variables
+    ENV['ARVADOS_API_HOST'] = 'unused_by_test'
+    ENV['ARVADOS_API_TOKEN'] = 'unused_by_test'
+    @backtick_mock_failure = false
+  end
+
+  teardown do
+    ENV['ARVADOS_API_HOST'] = ''
+    ENV['ARVADOS_API_TOKEN'] = ''
+  end
+
+  def ` cmd # mock Kernel `
+    assert_equal 'arv-put', cmd.shellsplit[0]
+    if @backtick_mock_failure
+      # run a process so $? indicates failure
+      return super 'false'
+    end
+    # run a process so $? indicates success
+    super 'true'
+    file_contents = File.open(cmd.shellsplit[-1], "r").read
+    ". " +
+      Digest::MD5.hexdigest(file_contents) + "+" + file_contents.length.to_s +
+      " 0:" + file_contents.length.to_s + ":invalid_manifest_text.txt\n"
+  end
+
+  test "salvage test collection with valid manifest text" do
+    # create a collection to test salvaging
+    src_collection = Collection.new name: "test collection", manifest_text: TEST_MANIFEST
+    src_collection.save!
+
+    # salvage this collection
+    salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
+
+    # verify the updated src_collection data
+    updated_src_collection = Collection.find_by_uuid src_collection.uuid
+    updated_name = updated_src_collection.name
+    assert_equal true, updated_name.include?(src_collection.name)
+
+    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    assert_not_nil match
+    assert_not_nil match[1]
+    assert_empty updated_src_collection.manifest_text
+
+    # match[1] is the uuid of the new collection created from src_collection's salvaged data
+    # use this to get the new collection and verify
+    new_collection = Collection.find_by_uuid match[1]
+    match = new_collection.name.match /^salvaged from (.*),.*/
+    assert_not_nil match
+    assert_equal src_collection.uuid, match[1]
+
+    # verify the new collection's manifest format
+    expected_manifest = ". " + Digest::MD5.hexdigest(TEST_MANIFEST_STRIPPED) + "+" +
+      TEST_MANIFEST_STRIPPED.length.to_s + " 0:" + TEST_MANIFEST_STRIPPED.length.to_s +
+      ":invalid_manifest_text.txt\n. 341dabea2bd78ad0d6fc3f5b926b450e+85626 d7321a918923627c972d8f8080c07d29+82570 0:168196:salvaged_data\n"
+    assert_equal expected_manifest, new_collection.manifest_text
+  end
+
+  test "salvage collection with no uuid required argument" do
+    e = assert_raises RuntimeError do
+      salvage_collection nil
+    end
+  end
+
+  test "salvage collection with bogus uuid" do
+    e = assert_raises RuntimeError do
+      salvage_collection 'bogus-uuid'
+    end
+    assert_equal "No collection found for bogus-uuid.", e.message
+  end
+
+  test "salvage collection with no env ARVADOS_API_HOST" do
+    e = assert_raises RuntimeError do
+      ENV['ARVADOS_API_HOST'] = ''
+      ENV['ARVADOS_API_TOKEN'] = ''
+      salvage_collection collections('user_agreement').uuid
+    end
+    assert_equal "ARVADOS environment variables missing. Please set your admin user credentials as ARVADOS environment variables.", e.message
+  end
+
+  test "salvage collection with error during arv-put" do
+    # try to salvage collection while mimicking error during arv-put
+    @backtick_mock_failure = true
+    e = assert_raises RuntimeError do
+      salvage_collection collections('user_agreement').uuid
+    end
+    assert_match /Error during arv-put: pid \d+ exit \d+ \(cmd was \"arv-put .*\"\)/, e.message
+  end
+
+  # This test uses BAD_MANIFEST, which has the following flaws:
+  #   Missing stream name with locator in it's place
+  #   Invalid locators:
+  #     foo-faafaafaabd78ad0d6fc3f5b926b450e+foo
+  #     bar-baabaabaabd78ad0d6fc3f5b926b450e
+  #     bad12345dae58ad0d6fc3f5b926b450e+
+  #     341dabea2bd78ad0d6fc3f5b926b450e+abc
+  #     341dabea2bd78ad0d6fc3f5b926abcdf
+  # Expectation: All these locators are preserved in salvaged_data
+  test "invalid locators preserved during salvaging" do
+    locator_data = salvage_collection_locator_data BAD_MANIFEST
+    assert_equal \
+    ["faafaafaabd78ad0d6fc3f5b926b450e",
+     "baabaabaabd78ad0d6fc3f5b926b450e",
+     "bad12345dae58ad0d6fc3f5b926b450e",
+     "341dabea2bd78ad0d6fc3f5b926b450e",
+     "341dabea2bd78ad0d6fc3f5b926abcdf",
+     "abcdabea2bd78ad0d6fc3f5b926b450e+1000",
+     "d7321a918923627c972d8f8080c07d29+2000",
+    ], locator_data[0]
+    assert_equal 1000+2000, locator_data[1]
+  end
+
+  test "salvage a collection with invalid manifest text" do
+    # create a collection to test salvaging
+    src_collection = Collection.new name: "test collection", manifest_text: BAD_MANIFEST, owner_uuid: 'zzzzz-tpzed-000000000000000'
+    src_collection.save!(validate: false)
+
+    # salvage this collection
+    salvage_collection src_collection.uuid, 'test salvage collection - see #6277, #6859'
+
+    # verify the updated src_collection data
+    updated_src_collection = Collection.find_by_uuid src_collection.uuid
+    updated_name = updated_src_collection.name
+    assert_equal true, updated_name.include?(src_collection.name)
+
+    match = updated_name.match /^test collection.*salvaged data at (.*)\)$/
+    assert_not_nil match
+    assert_not_nil match[1]
+    assert_empty updated_src_collection.manifest_text
+
+    # match[1] is the uuid of the new collection created from src_collection's salvaged data
+    # use this to get the new collection and verify
+    new_collection = Collection.find_by_uuid match[1]
+    match = new_collection.name.match /^salvaged from (.*),.*/
+    assert_not_nil match
+    assert_equal src_collection.uuid, match[1]
+    # verify the new collection's manifest includes the bad locators
+    expected_manifest = ". " + Digest::MD5.hexdigest(BAD_MANIFEST) + "+" + BAD_MANIFEST.length.to_s +
+      " 0:" + BAD_MANIFEST.length.to_s + ":invalid_manifest_text.txt\n. faafaafaabd78ad0d6fc3f5b926b450e baabaabaabd78ad0d6fc3f5b926b450e bad12345dae58ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926b450e 341dabea2bd78ad0d6fc3f5b926abcdf abcdabea2bd78ad0d6fc3f5b926b450e+1000 d7321a918923627c972d8f8080c07d29+2000 0:3000:salvaged_data\n"
+    assert_equal expected_manifest, new_collection.manifest_text
+  end
+end
diff --git a/services/api/test/unit/user_notifier_test.rb b/services/api/test/unit/user_notifier_test.rb

index b280ae7f08c24ff322b57c347f0a4f7f8843d89f..8629f2951ac2652747051e03b38332df0031ae98 100644 (file)
--- a/services/api/test/unit/user_notifier_test.rb
+++ b/services/api/test/unit/user_notifier_test.rb
@@ -12,11 +12,9 @@ class UserNotifierTest < ActionMailer::TestCase
      # Test the body of the sent email contains what we expect it to
      assert_equal Rails.configuration.user_notifier_email_from, email.from.first
      assert_equal user.email, email.to.first
-    assert_equal 'Welcome to Curoverse', email.subject
-    assert (email.body.to_s.include? 'Your Arvados account has been set up'),
-        'Expected Your Arvados account has been set up in email body'
-    assert (email.body.to_s.include? user.email),
-        'Expected user email in email body'
+    assert_equal 'Welcome to Curoverse - shell account enabled', email.subject
+    assert (email.body.to_s.include? 'Your Arvados shell account has been set up'),
+        'Expected Your Arvados shell account has been set up in email body'
      assert (email.body.to_s.include? Rails.configuration.workbench_address),
          'Expected workbench url in email body'
    end
diff --git a/services/api/test/unit/user_test.rb b/services/api/test/unit/user_test.rb

index 9bcb0116fba1628bfda8808a7cfbe408da9dc32e..b96645ce263d2cb97b6028693b88e2334df51819 100644 (file)
--- a/services/api/test/unit/user_test.rb
+++ b/services/api/test/unit/user_test.rb
@@ -9,6 +9,137 @@ class UserTest < ActiveSupport::TestCase
      system_user
    end
  
+  %w(a aa a0 aA Aa AA A0).each do |username|
+    test "#{username.inspect} is a valid username" do
+      user = User.new(username: username)
+      assert(user.valid?)
+    end
+  end
+
+  test "username is not required" do
+    user = User.new(username: nil)
+    assert(user.valid?)
+  end
+
+  test "username beginning with numeral is invalid" do
+    user = User.new(username: "0a")
+    refute(user.valid?)
+  end
+
+  "\\.-_/!@#$%^&*()[]{}".each_char do |bad_char|
+    test "username containing #{bad_char.inspect} is invalid" do
+      user = User.new(username: "bad#{bad_char}username")
+      refute(user.valid?)
+    end
+  end
+
+  test "username must be unique" do
+    user = User.new(username: users(:active).username)
+    refute(user.valid?)
+  end
+
+  test "non-admin can't update username" do
+    set_user_from_auth :rominiadmin
+    user = User.find_by_uuid(users(:rominiadmin).uuid)
+    user.username = "selfupdate"
+    assert_not_allowed { user.save }
+  end
+
+  def check_admin_username_change(fixture_name)
+    set_user_from_auth :admin_trustedclient
+    user = User.find_by_uuid(users(fixture_name).uuid)
+    user.username = "newnamefromtest"
+    assert(user.save)
+  end
+
+  test "admin can set username" do
+    check_admin_username_change(:active_no_prefs)
+  end
+
+  test "admin can update username" do
+    check_admin_username_change(:active)
+  end
+
+  test "admin can update own username" do
+    check_admin_username_change(:admin)
+  end
+
+  def check_new_username_setting(email_name, expect_name)
+    set_user_from_auth :admin
+    user = User.create!(email: "#{email_name}@example.org")
+    assert_equal(expect_name, user.username)
+  end
+
+  test "new username set from e-mail" do
+    check_new_username_setting("dakota", "dakota")
+  end
+
+  test "new username set from e-mail with leading digits" do
+    check_new_username_setting("1dakota9", "dakota9")
+  end
+
+  test "new username set from e-mail with punctuation" do
+    check_new_username_setting("dakota.9", "dakota9")
+  end
+
+  test "new username set from e-mail with leading digits and punctuation" do
+    check_new_username_setting("1.dakota.z", "dakotaz")
+  end
+
+  test "new username set from e-mail with extra part" do
+    check_new_username_setting("dakota+arvados", "dakota")
+  end
+
+  test "new username set with deduplication" do
+    name = users(:active).username
+    check_new_username_setting(name, "#{name}2")
+  end
+
+  test "new username set avoiding blacklist" do
+    Rails.configuration.auto_setup_name_blacklist = ["root"]
+    check_new_username_setting("root", "root2")
+  end
+
+  test "no username set when no base available" do
+    check_new_username_setting("_", nil)
+  end
+
+  test "updating username updates repository names" do
+    set_user_from_auth :admin
+    user = users(:active)
+    user.username = "newtestname"
+    assert(user.save, "username update failed")
+    {foo: "newtestname/foo", repository2: "newtestname/foo2"}.
+        each_pair do |repo_sym, expect_name|
+      assert_equal(expect_name, repositories(repo_sym).name)
+    end
+  end
+
+  test "admin can clear username when user owns no repositories" do
+    set_user_from_auth :admin
+    user = users(:spectator)
+    user.username = nil
+    assert(user.save)
+    assert_nil(user.username)
+  end
+
+  test "admin can't clear username when user owns repositories" do
+    set_user_from_auth :admin
+    user = users(:active)
+    start_username = user.username
+    user.username = nil
+    assert_not_allowed { user.save }
+    refute_empty(user.errors[:username])
+  end
+
+  test "failed username update doesn't change repository names" do
+    set_user_from_auth :admin
+    user = users(:active)
+    user.username = users(:fuse).username
+    assert_not_allowed { user.save }
+    assert_equal("active/foo", repositories(:foo).name)
+  end
+
    [[false, 'foo@example.com', true, nil],
     [false, 'bar@example.com', nil, true],
     [true, 'foo@example.com', true, nil],
@@ -195,106 +326,60 @@ class UserTest < ActiveSupport::TestCase
    test "create new user with notifications" do
      set_user_from_auth :admin
  
-    create_user_and_verify_setup_and_notifications true, 'active-notify-address@example.com', 'inactive-notify-address@example.com', nil, false
-    create_user_and_verify_setup_and_notifications true, 'active-notify-address@example.com', [], nil, false
-    create_user_and_verify_setup_and_notifications true, [], [], nil, false
-    create_user_and_verify_setup_and_notifications false, 'active-notify-address@example.com', 'inactive-notify-address@example.com', nil, false
-    create_user_and_verify_setup_and_notifications false, [], 'inactive-notify-address@example.com', nil, false
-    create_user_and_verify_setup_and_notifications false, [], [], nil, false
+    create_user_and_verify_setup_and_notifications true, 'active-notify-address@example.com', 'inactive-notify-address@example.com', nil, nil
+    create_user_and_verify_setup_and_notifications true, 'active-notify-address@example.com', [], nil, nil
+    create_user_and_verify_setup_and_notifications true, [], [], nil, nil
+    create_user_and_verify_setup_and_notifications false, 'active-notify-address@example.com', 'inactive-notify-address@example.com', nil, nil
+    create_user_and_verify_setup_and_notifications false, [], 'inactive-notify-address@example.com', nil, nil
+    create_user_and_verify_setup_and_notifications false, [], [], nil, nil
    end
  
    [
-    [false, [], [], 'inactive-none@example.com', false, false, true],
-    [false, [], [], 'inactive-vm@example.com', true, false, true],
-    [false, [], [], 'inactive-repo@example.com', false, true, true],
-    [false, [], [], 'inactive-both@example.com', true, true, true],
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'active-none@example.com', false, false, true],
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'active-vm@example.com', true, false, true],
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'active-repo@example.com', false, true, true],
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'active-both@example.com', true, true, true],
-
-    [false, [], [], nil, true, true, false],
-
-    [false, [], [], 'arvados', true, true, false],
-    [false, [], [], 'arvados', true, false, false],   # blacklisted username
-    [false, [], [], 'arvados', false, false, true],   # since we are not creating repo and vm login, this blacklisted name is not a problem
-
-    [false, [], [], 'arvados@example.com', false, false, true],   # since we are not creating repo and vm login, this blacklisted name is not a problem
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'arvados@example.com', false, false, true],   # since we are not creating repo and vm login, this blacklisted name is not a problem
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'root@example.com', true, false, false], # blacklisted name
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'root@example.com', true, false, false], # blacklisted name
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'roo_t@example.com', false, true, true], # not blacklisted name
-
-    [false, [], [], '@example.com', true, false, false],  # incorrect format
-    [false, [], [], '@example.com', false, true, false],
-    [false, [], [], '@example.com', false, false, true],  # no repo and vm login, so no issue with email format
-
-    [false, [], [], '^^incorrect_format@example.com', true, true, false],
-
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_repo@example.com', true, true, true],  # existing repository name 'auto_setup_repo'
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_repo@example.com', true, false, true],  # existing repository name 'auto_setup_repo'
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_repo@example.com', false, true, true],  # existing repository name 'auto_setup_repo'
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_repo@example.com', false, false, true],  # existing repository name 'auto_setup_repo', but we are not creating repo or login link
-
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_vm_login@example.com', true, true, true], # existing vm login name
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_vm_login@example.com', true, false, true], # existing vm login name
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_vm_login@example.com', false, true, true], # existing vm login name
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', 'auto_setup_vm_login@example.com', false, false, true], # existing vm login name, but we are not creating repo or login link
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '*!*@example.com', true, false, false], # username is invalid format
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', '*!*@example.com', false, false, true], # since no repo and vm login, username is ok (not validated)
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '*!*@example.com', false, false, true], # since no repo and vm login, username is ok (not validated)
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '&4ad@example.com', true, true, false], # username is invalid format
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '&4ad@example.com', false, false, true], # no repo or vm login, so format not checked
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', '&4ad@example.com', true, true, false], # username is invalid format
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', '&4ad@example.com', false, false, true], # no repo or vm login, so format not checked
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '4ad@example.com', true, true, false], # username is invalid format
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '4ad@example.com', false, false, true], # no repo or vm login, so format not checked
-    [false, 'active-notify@example.com', 'inactive-notify@example.com', '4ad@example.com', false, false, true], # no repo or vm login, so format not checked
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '.foo@example.com', false, false, true], # no repo or vm login, so format not checked
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', '.foo@example.com', true, false, false], # invalid format
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'bar.@example.com', false, false, true], # no repo or vm login, so format not checked
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'bar.@example.com', true, false, false], # valid format
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'ice9@example.com', false, false, true], # no repo or vm login, so format not checked
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'ice9@example.com', true, false, true], # valid format
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'o_o@example.com', false, false, true], # no repo or vm login, so format not checked
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'o_o@example.com', true, false, true], # valid format
-
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'r00t@example.com', false, false, true], # no repo or vm login, so format not checked
-    [true, 'active-notify@example.com', 'inactive-notify@example.com', 'r00t@example.com', true, false, true], # valid format
-
-  ].each do |active, new_user_recipients, inactive_recipients, email, auto_setup_vm, auto_setup_repo, ok_to_auto_setup|
+    # Easy inactive user tests.
+    [false, [], [], "inactive-none@example.com", false, false, "inactivenone"],
+    [false, [], [], "inactive-vm@example.com", true, false, "inactivevm"],
+    [false, [], [], "inactive-repo@example.com", false, true, "inactiverepo"],
+    [false, [], [], "inactive-both@example.com", true, true, "inactiveboth"],
+
+    # Easy active user tests.
+    [true, "active-notify@example.com", "inactive-notify@example.com", "active-none@example.com", false, false, "activenone"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "active-vm@example.com", true, false, "activevm"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "active-repo@example.com", false, true, "activerepo"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "active-both@example.com", true, true, "activeboth"],
+
+    # Test users with malformed e-mail addresses.
+    [false, [], [], nil, true, true, nil],
+    [false, [], [], "arvados", true, true, nil],
+    [false, [], [], "@example.com", true, true, nil],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "*!*@example.com", true, false, nil],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "*!*@example.com", false, false, nil],
+
+    # Test users with various username transformations.
+    [false, [], [], "arvados@example.com", false, false, "arvados2"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "arvados@example.com", false, false, "arvados2"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "root@example.com", true, false, "root2"],
+    [false, "active-notify@example.com", "inactive-notify@example.com", "root@example.com", true, false, "root2"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "roo_t@example.com", false, true, "root2"],
+    [false, [], [], "^^incorrect_format@example.com", true, true, "incorrectformat"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "&4a_d9.@example.com", true, true, "ad9"],
+    [true, "active-notify@example.com", "inactive-notify@example.com", "&4a_d9.@example.com", false, false, "ad9"],
+    [false, "active-notify@example.com", "inactive-notify@example.com", "&4a_d9.@example.com", true, true, "ad9"],
+    [false, "active-notify@example.com", "inactive-notify@example.com", "&4a_d9.@example.com", false, false, "ad9"],
+  ].each do |active, new_user_recipients, inactive_recipients, email, auto_setup_vm, auto_setup_repo, expect_username|
      test "create new user with auto setup #{active} #{email} #{auto_setup_vm} #{auto_setup_repo}" do
-      auto_setup_new_users = Rails.configuration.auto_setup_new_users
-      auto_setup_new_users_with_vm_uuid = Rails.configuration.auto_setup_new_users_with_vm_uuid
-      auto_setup_new_users_with_repository = Rails.configuration.auto_setup_new_users_with_repository
-
-      begin
-        set_user_from_auth :admin
+      set_user_from_auth :admin
  
-        Rails.configuration.auto_setup_new_users = true
+      Rails.configuration.auto_setup_new_users = true
  
-        if auto_setup_vm
-          Rails.configuration.auto_setup_new_users_with_vm_uuid = virtual_machines(:testvm)['uuid']
-        else
-          Rails.configuration.auto_setup_new_users_with_vm_uuid = false
-        end
+      if auto_setup_vm
+        Rails.configuration.auto_setup_new_users_with_vm_uuid = virtual_machines(:testvm)['uuid']
+      else
+        Rails.configuration.auto_setup_new_users_with_vm_uuid = false
+      end
  
-        Rails.configuration.auto_setup_new_users_with_repository = auto_setup_repo
+      Rails.configuration.auto_setup_new_users_with_repository = auto_setup_repo
  
-        create_user_and_verify_setup_and_notifications active, new_user_recipients, inactive_recipients, email, ok_to_auto_setup
-      ensure
-        Rails.configuration.auto_setup_new_users = auto_setup_new_users
-        Rails.configuration.auto_setup_new_users_with_vm_uuid = auto_setup_new_users_with_vm_uuid
-        Rails.configuration.auto_setup_new_users_with_repository = auto_setup_new_users_with_repository
-      end
+      create_user_and_verify_setup_and_notifications active, new_user_recipients, inactive_recipients, email, expect_username
      end
    end
  
@@ -338,14 +423,7 @@ class UserTest < ActiveSupport::TestCase
  
    test "create new user as non-admin user" do
      set_user_from_auth :active
-
-    begin
-      user = User.new
-      user.save
-    rescue ArvadosModel::PermissionDeniedError => e
-    end
-    assert (e.message.include? 'PermissionDeniedError'),
-        'Expected PermissionDeniedError'
+    assert_not_allowed { User.new.save }
    end
  
    test "setup new user" do
@@ -358,7 +436,7 @@ class UserTest < ActiveSupport::TestCase
  
      vm = VirtualMachine.create
  
-    response = User.setup user, openid_prefix, 'test_repo', vm.uuid
+    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
  
      resp_user = find_obj_in_resp response, 'User'
      verify_user resp_user, email
@@ -379,6 +457,7 @@ class UserTest < ActiveSupport::TestCase
  
      vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
      verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
    end
  
    test "setup new user with junk in database" do
@@ -400,7 +479,7 @@ class UserTest < ActiveSupport::TestCase
  
      verify_link resp_link, 'permission', 'can_login', email, bad_uuid
  
-    response = User.setup user, openid_prefix, 'test_repo', vm.uuid
+    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
  
      resp_user = find_obj_in_resp response, 'User'
      verify_user resp_user, email
@@ -421,6 +500,7 @@ class UserTest < ActiveSupport::TestCase
  
      vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
      verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
    end
  
    test "setup new user in multiple steps" do
@@ -446,7 +526,7 @@ class UserTest < ActiveSupport::TestCase
      verify_link group_perm, 'permission', 'can_read', resp_user[:uuid], nil
  
      # invoke setup again with repo_name
-    response = User.setup user, openid_prefix, 'test_repo'
+    response = User.setup user, openid_prefix, 'foo/testrepo'
      resp_user = find_obj_in_resp response, 'User', nil
      verify_user resp_user, email
      assert_equal user.uuid, resp_user[:uuid], 'expected uuid not found'
@@ -460,7 +540,7 @@ class UserTest < ActiveSupport::TestCase
      # invoke setup again with a vm_uuid
      vm = VirtualMachine.create
  
-    response = User.setup user, openid_prefix, 'test_repo', vm.uuid
+    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
  
      resp_user = find_obj_in_resp response, 'User', nil
      verify_user resp_user, email
@@ -474,6 +554,7 @@ class UserTest < ActiveSupport::TestCase
  
      vm_perm = find_obj_in_resp response, 'Link', 'arvados#virtualMachine'
      verify_link vm_perm, 'permission', 'can_login', resp_user[:uuid], vm.uuid
+    assert_equal("foo", vm_perm.properties["username"])
    end
  
    def find_obj_in_resp (response_items, object_type, head_kind=nil)
@@ -521,78 +602,42 @@ class UserTest < ActiveSupport::TestCase
      end
    end
  
-  def create_user_and_verify_setup_and_notifications (active, new_user_recipients, inactive_recipients, email, ok_to_auto_setup)
+  def create_user_and_verify_setup_and_notifications (active, new_user_recipients, inactive_recipients, email, expect_username)
      Rails.configuration.new_user_notification_recipients = new_user_recipients
      Rails.configuration.new_inactive_user_notification_recipients = inactive_recipients
  
-    assert_equal new_user_recipients, Rails.configuration.new_user_notification_recipients
-    assert_equal inactive_recipients, Rails.configuration.new_inactive_user_notification_recipients
-
      ActionMailer::Base.deliveries = []
  
+    can_setup = (Rails.configuration.auto_setup_new_users and
+                 (not expect_username.nil?))
+    expect_repo_name = "#{expect_username}/#{expect_username}"
+    prior_repo = Repository.where(name: expect_repo_name).first
+
      user = User.new
      user.first_name = "first_name_for_newly_created_user"
      user.email = email
      user.is_active = active
      user.save!
+    assert_equal(expect_username, user.username)
  
      # check user setup
-    group = Group.where(name: 'All users').select do |g|
-      g[:uuid].match /-f+$/
-    end.first
-
-    if !Rails.configuration.auto_setup_new_users || !ok_to_auto_setup
-      # verify that the user is not added to "All groups" by auto_setup
-      verify_link_exists false, group[:uuid], user.uuid, 'permission', 'can_read', nil, nil
-
-      # check oid login link not created by auto_setup
-      verify_link_exists false, user.uuid, user.email, 'permission', 'can_login', nil, nil
-    else
-      # verify that auto_setup took place
-      # verify that the user is added to "All groups"
-      verify_link_exists true, group[:uuid], user.uuid, 'permission', 'can_read', nil, nil
-
-      # check oid login link
-      verify_link_exists true, user.uuid, user.email, 'permission', 'can_login', nil, nil
-
-      username = user.email.partition('@')[0] if email
-
-      # check repo
-      repo_names = []
-      if Rails.configuration.auto_setup_new_users_with_repository
-        repos = Repository.where('name like ?', "%#{username}%")
-        assert_not_nil repos, 'repository not found'
-        assert_equal true, repos.any?, 'repository not found'
-        repo_uuids = []
-        repos.each do |repo|
-          repo_uuids << repo[:uuid]
-          repo_names << repo[:name]
-        end
-        if username == 'auto_setup_repo'
-          begin
-            repo_names.delete('auto_setup_repo')
-          ensure
-            assert_equal true, repo_names.any?, 'Repository name for username foo is not unique'
-          end
-        end
-        verify_link_exists true, repo_uuids, user.uuid, 'permission', 'can_manage', nil, nil
-      end
-
-      # if username is existing vm login name, make sure the username used to generate any repo is unique
-      if username == 'auto_setup_vm_login' || username == 'auto_setup_repo'
-        if repo_names.any?
-          assert repo_names.first.start_with? username
-          assert_not_nil /\d$/.match(repo_names.first)
-        end
-      end
-
-      # check vm uuid
-      vm_uuid = Rails.configuration.auto_setup_new_users_with_vm_uuid
-      if vm_uuid
-        verify_link_exists true, vm_uuid, user.uuid, 'permission', 'can_login', 'username', (username == 'auto_setup_repo' ? repo_names.first : username)
-      else
-        verify_link_exists false, vm_uuid, user.uuid, 'permission', 'can_login', 'username', (username == 'auto_setup_repo' ? repo_names.first : username)
-      end
+    verify_link_exists(Rails.configuration.auto_setup_new_users,
+                       groups(:all_users).uuid, user.uuid,
+                       "permission", "can_read")
+    # Check for OID login link.
+    verify_link_exists(Rails.configuration.auto_setup_new_users,
+                       user.uuid, user.email, "permission", "can_login")
+    # Check for repository.
+    if named_repo = (prior_repo or
+                     Repository.where(name: expect_repo_name).first)
+      verify_link_exists((can_setup and prior_repo.nil? and
+                          Rails.configuration.auto_setup_new_users_with_repository),
+                         named_repo.uuid, user.uuid, "permission", "can_manage")
+    end
+    # Check for VM login.
+    if auto_vm_uuid = Rails.configuration.auto_setup_new_users_with_vm_uuid
+      verify_link_exists(can_setup, auto_vm_uuid, user.uuid,
+                         "permission", "can_login", "username", expect_username)
      end
  
      # check email notifications
@@ -601,7 +646,7 @@ class UserTest < ActiveSupport::TestCase
  
      new_user_email_subject = "#{Rails.configuration.email_subject_prefix}New user created notification"
      if Rails.configuration.auto_setup_new_users
-      new_user_email_subject = (ok_to_auto_setup || active) ?
+      new_user_email_subject = (expect_username or active) ?
                                   "#{Rails.configuration.email_subject_prefix}New user created and setup notification" :
                                   "#{Rails.configuration.email_subject_prefix}New user created, but not setup notification"
      end
@@ -641,7 +686,7 @@ class UserTest < ActiveSupport::TestCase
  
    end
  
-  def verify_link_exists link_exists, head_uuid, tail_uuid, link_class, link_name, property_name, property_value
+  def verify_link_exists link_exists, head_uuid, tail_uuid, link_class, link_name, property_name=nil, property_value=nil
      all_links = Link.where(head_uuid: head_uuid,
                             tail_uuid: tail_uuid,
                             link_class: link_class,
diff --git a/services/arv-git-httpd/.gitignore b/services/arv-git-httpd/.gitignore

new file mode 100644 (file)

index 0000000..1ae1045
--- /dev/null
+++ b/services/arv-git-httpd/.gitignore
@@ -0,0 +1 @@
+arv-git-httpd
diff --git a/services/arv-git-httpd/auth_handler.go b/services/arv-git-httpd/auth_handler.go

new file mode 100644 (file)

index 0000000..fccb0c9
--- /dev/null
+++ b/services/arv-git-httpd/auth_handler.go
@@ -0,0 +1,153 @@
+package main
+
+import (
+       "log"
+       "net/http"
+       "os"
+       "strings"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+type authHandler struct {
+       handler http.Handler
+}
+
+func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       var statusCode int
+       var statusText string
+       var apiToken string
+       var repoName string
+       var validApiToken bool
+
+       w := httpserver.WrapResponseWriter(wOrig)
+
+       defer func() {
+               if w.WroteStatus() == 0 {
+                       // Nobody has called WriteHeader yet: that
+                       // must be our job.
+                       w.WriteHeader(statusCode)
+                       w.Write([]byte(statusText))
+               }
+
+               // If the given password is a valid token, log the first 10 characters of the token.
+               // Otherwise: log the string <invalid> if a password is given, else an empty string.
+               passwordToLog := ""
+               if !validApiToken {
+                       if len(apiToken) > 0 {
+                               passwordToLog = "<invalid>"
+                       }
+               } else {
+                       passwordToLog = apiToken[0:10]
+               }
+
+               httpserver.Log(r.RemoteAddr, passwordToLog, w.WroteStatus(), statusText, repoName, r.Method, r.URL.Path)
+       }()
+
+       creds := auth.NewCredentialsFromHTTPRequest(r)
+       if len(creds.Tokens) == 0 {
+               statusCode, statusText = http.StatusUnauthorized, "no credentials provided"
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"git\"")
+               return
+       }
+       apiToken = creds.Tokens[0]
+
+       // Access to paths "/foo/bar.git/*" and "/foo/bar/.git/*" are
+       // protected by the permissions on the repository named
+       // "foo/bar".
+       pathParts := strings.SplitN(r.URL.Path[1:], ".git/", 2)
+       if len(pathParts) != 2 {
+               statusCode, statusText = http.StatusBadRequest, "bad request"
+               return
+       }
+       repoName = pathParts[0]
+       repoName = strings.TrimRight(repoName, "/")
+
+       arv := clientPool.Get()
+       if arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "connection pool failed: "+clientPool.Err().Error()
+               return
+       }
+       defer clientPool.Put(arv)
+
+       // Ask API server whether the repository is readable using
+       // this token (by trying to read it!)
+       arv.ApiToken = apiToken
+       reposFound := arvadosclient.Dict{}
+       if err := arv.List("repositories", arvadosclient.Dict{
+               "filters": [][]string{{"name", "=", repoName}},
+       }, &reposFound); err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       validApiToken = true
+       if avail, ok := reposFound["items_available"].(float64); !ok {
+               statusCode, statusText = http.StatusInternalServerError, "bad list response from API"
+               return
+       } else if avail < 1 {
+               statusCode, statusText = http.StatusNotFound, "not found"
+               return
+       } else if avail > 1 {
+               statusCode, statusText = http.StatusInternalServerError, "name collision"
+               return
+       }
+
+       repoUUID := reposFound["items"].([]interface{})[0].(map[string]interface{})["uuid"].(string)
+
+       isWrite := strings.HasSuffix(r.URL.Path, "/git-receive-pack")
+       if !isWrite {
+               statusText = "read"
+       } else {
+               err := arv.Update("repositories", repoUUID, arvadosclient.Dict{
+                       "repository": arvadosclient.Dict{
+                               "modified_at": time.Now().String(),
+                       },
+               }, &arvadosclient.Dict{})
+               if err != nil {
+                       statusCode, statusText = http.StatusForbidden, err.Error()
+                       return
+               }
+               statusText = "write"
+       }
+
+       // Regardless of whether the client asked for "/foo.git" or
+       // "/foo/.git", we choose whichever variant exists in our repo
+       // root, and we try {uuid}.git and {uuid}/.git first. If none
+       // of these exist, we 404 even though the API told us the repo
+       // _should_ exist (presumably this means the repo was just
+       // created, and gitolite sync hasn't run yet).
+       rewrittenPath := ""
+       tryDirs := []string{
+               "/" + repoUUID + ".git",
+               "/" + repoUUID + "/.git",
+               "/" + repoName + ".git",
+               "/" + repoName + "/.git",
+       }
+       for _, dir := range tryDirs {
+               if fileInfo, err := os.Stat(theConfig.Root + dir); err != nil {
+                       if !os.IsNotExist(err) {
+                               statusCode, statusText = http.StatusInternalServerError, err.Error()
+                               return
+                       }
+               } else if fileInfo.IsDir() {
+                       rewrittenPath = dir + "/" + pathParts[1]
+                       break
+               }
+       }
+       if rewrittenPath == "" {
+               log.Println("WARNING:", repoUUID,
+                       "git directory not found in", theConfig.Root, tryDirs)
+               // We say "content not found" to disambiguate from the
+               // earlier "API says that repo does not exist" error.
+               statusCode, statusText = http.StatusNotFound, "content not found"
+               return
+       }
+       r.URL.Path = rewrittenPath
+
+       h.handler.ServeHTTP(&w, r)
+}
diff --git a/services/arv-git-httpd/doc.go b/services/arv-git-httpd/doc.go

new file mode 100644 (file)

index 0000000..ff4599d
--- /dev/null
+++ b/services/arv-git-httpd/doc.go
@@ -0,0 +1,35 @@
+/*
+arv-git-httpd provides authenticated access to Arvados-hosted git repositories.
+
+See http://doc.arvados.org/install/install-arv-git-httpd.html.
+
+Example:
+
+       arv-git-httpd -address=:8000 -repo-root=/var/lib/arvados/git
+
+Options:
+
+       -address [host]:[port]
+
+Listen at the given host and port.
+
+Host can be a domain name, an IP address, or empty (listen on all
+addresses).
+
+Port can be a name, a port number, or 0 (choose an available port).
+
+       -repo-root path
+
+Directory containing git repositories. When a client requests either
+"foo/bar.git" or "foo/bar/.git", git-http-backend will be invoked on
+"path/foo/bar.git" or (if that doesn't exist) "path/foo/bar/.git".
+
+       -git-command path
+
+Location of the CGI program to execute for each authorized request
+(normally this is gitolite-shell if repositories are controlled by
+gitolite, otherwise git). It is invoked with a single argument,
+'http-backend'.  Default is /usr/bin/git.
+
+*/
+package main
diff --git a/services/arv-git-httpd/git_handler.go b/services/arv-git-httpd/git_handler.go

new file mode 100644 (file)

index 0000000..0312b29
--- /dev/null
+++ b/services/arv-git-httpd/git_handler.go
@@ -0,0 +1,59 @@
+package main
+
+import (
+       "log"
+       "net"
+       "net/http"
+       "net/http/cgi"
+)
+
+// gitHandler is an http.Handler that invokes git-http-backend (or
+// whatever backend is configured) via CGI, with appropriate
+// environment variables in place for git-http-backend or
+// gitolite-shell.
+type gitHandler struct {
+       cgi.Handler
+}
+
+func newGitHandler() http.Handler {
+       return &gitHandler{
+               Handler: cgi.Handler{
+                       Path: theConfig.GitCommand,
+                       Dir:  theConfig.Root,
+                       Env: []string{
+                               "GIT_PROJECT_ROOT=" + theConfig.Root,
+                               "GIT_HTTP_EXPORT_ALL=",
+                               "SERVER_ADDR=" + theConfig.Addr,
+                       },
+                       InheritEnv: []string{
+                               "PATH",
+                               // Needed if GitCommand is gitolite-shell:
+                               "GITOLITE_HTTP_HOME",
+                               "GL_BYPASS_ACCESS_CHECKS",
+                       },
+                       Args: []string{"http-backend"},
+               },
+       }
+}
+
+func (h *gitHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
+       remoteHost, remotePort, err := net.SplitHostPort(r.RemoteAddr)
+       if err != nil {
+               log.Printf("Internal error: SplitHostPort(r.RemoteAddr==%q): %s", r.RemoteAddr, err)
+               w.WriteHeader(http.StatusInternalServerError)
+               return
+       }
+
+       // Copy the wrapped cgi.Handler, so these request-specific
+       // variables don't leak into the next request.
+       handlerCopy := h.Handler
+       handlerCopy.Env = append(handlerCopy.Env,
+               // In Go1.5 we can skip this, net/http/cgi will do it for us:
+               "REMOTE_HOST="+remoteHost,
+               "REMOTE_ADDR="+remoteHost,
+               "REMOTE_PORT="+remotePort,
+               // Ideally this would be a real username:
+               "REMOTE_USER="+r.RemoteAddr,
+       )
+       handlerCopy.ServeHTTP(w, r)
+}
diff --git a/services/arv-git-httpd/git_handler_test.go b/services/arv-git-httpd/git_handler_test.go

new file mode 100644 (file)

index 0000000..b3e49c5
--- /dev/null
+++ b/services/arv-git-httpd/git_handler_test.go
@@ -0,0 +1,56 @@
+package main
+
+import (
+       "net/http"
+       "net/http/httptest"
+       "net/url"
+       "os"
+       "regexp"
+
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&GitHandlerSuite{})
+
+type GitHandlerSuite struct{}
+
+func (s *GitHandlerSuite) TestEnvVars(c *check.C) {
+       u, err := url.Parse("git.zzzzz.arvadosapi.com/test")
+       c.Check(err, check.Equals, nil)
+       resp := httptest.NewRecorder()
+       req := &http.Request{
+               Method:     "GET",
+               URL:        u,
+               RemoteAddr: "[::1]:12345",
+       }
+       h := newGitHandler()
+       h.(*gitHandler).Path = "/bin/sh"
+       h.(*gitHandler).Args = []string{"-c", "echo HTTP/1.1 200 OK; echo Content-Type: text/plain; echo; env"}
+       os.Setenv("GITOLITE_HTTP_HOME", "/test/ghh")
+       os.Setenv("GL_BYPASS_ACCESS_CHECKS", "yesplease")
+
+       h.ServeHTTP(resp, req)
+
+       c.Check(resp.Code, check.Equals, http.StatusOK)
+       body := resp.Body.String()
+       c.Check(body, check.Matches, `(?ms).*^GITOLITE_HTTP_HOME=/test/ghh$.*`)
+       c.Check(body, check.Matches, `(?ms).*^GL_BYPASS_ACCESS_CHECKS=yesplease$.*`)
+       c.Check(body, check.Matches, `(?ms).*^REMOTE_HOST=::1$.*`)
+       c.Check(body, check.Matches, `(?ms).*^REMOTE_PORT=12345$.*`)
+       c.Check(body, check.Matches, `(?ms).*^SERVER_ADDR=`+regexp.QuoteMeta(theConfig.Addr)+`$.*`)
+}
+
+func (s *GitHandlerSuite) TestCGIError(c *check.C) {
+       u, err := url.Parse("git.zzzzz.arvadosapi.com/test")
+       c.Check(err, check.Equals, nil)
+       resp := httptest.NewRecorder()
+       req := &http.Request{
+               Method:     "GET",
+               URL:        u,
+               RemoteAddr: "bogus",
+       }
+       h := newGitHandler()
+       h.ServeHTTP(resp, req)
+       c.Check(resp.Code, check.Equals, http.StatusInternalServerError)
+       c.Check(resp.Body.String(), check.Equals, "")
+}
diff --git a/services/arv-git-httpd/main.go b/services/arv-git-httpd/main.go

new file mode 100644 (file)

index 0000000..98695c9
--- /dev/null
+++ b/services/arv-git-httpd/main.go
@@ -0,0 +1,50 @@
+package main
+
+import (
+       "flag"
+       "log"
+       "os"
+)
+
+type config struct {
+       Addr       string
+       GitCommand string
+       Root       string
+}
+
+var theConfig *config
+
+func init() {
+       theConfig = &config{}
+       flag.StringVar(&theConfig.Addr, "address", "0.0.0.0:80",
+               "Address to listen on, \"host:port\".")
+       flag.StringVar(&theConfig.GitCommand, "git-command", "/usr/bin/git",
+               "Path to git or gitolite-shell executable. Each authenticated request will execute this program with a single argument, \"http-backend\".")
+       cwd, err := os.Getwd()
+       if err != nil {
+               log.Fatalln("Getwd():", err)
+       }
+       flag.StringVar(&theConfig.Root, "repo-root", cwd,
+               "Path to git repositories.")
+
+       // MakeArvadosClient returns an error if token is unset (even
+       // though we don't need to do anything requiring
+       // authentication yet). We can't do this in newArvadosClient()
+       // just before calling MakeArvadosClient(), though, because
+       // that interferes with the env var needed by "run test
+       // servers".
+       os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+       flag.Parse()
+       srv := &server{}
+       if err := srv.Start(); err != nil {
+               log.Fatal(err)
+       }
+       log.Println("Listening at", srv.Addr)
+       log.Println("Repository root", theConfig.Root)
+       if err := srv.Wait(); err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/services/arv-git-httpd/server.go b/services/arv-git-httpd/server.go

new file mode 100644 (file)

index 0000000..40e77a8
--- /dev/null
+++ b/services/arv-git-httpd/server.go
@@ -0,0 +1,19 @@
+package main
+
+import (
+       "net/http"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+type server struct {
+       httpserver.Server
+}
+
+func (srv *server) Start() error {
+       mux := http.NewServeMux()
+       mux.Handle("/", &authHandler{newGitHandler()})
+       srv.Handler = mux
+       srv.Addr = theConfig.Addr
+       return srv.Server.Start()
+}
diff --git a/services/arv-git-httpd/server_test.go b/services/arv-git-httpd/server_test.go

new file mode 100644 (file)

index 0000000..c1364ca
--- /dev/null
+++ b/services/arv-git-httpd/server_test.go
@@ -0,0 +1,207 @@
+package main
+
+import (
+       "errors"
+       "io/ioutil"
+       "os"
+       "os/exec"
+       "strings"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+       spectatorToken = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+       activeToken    = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       anonymousToken = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+       expiredToken   = "2ym314ysp27sk7h943q6vtc378srb06se3pq6ghurylyf3pdmx"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+       tmpRepoRoot string
+       tmpWorkdir  string
+       testServer  *server
+}
+
+func (s *IntegrationSuite) TestPathVariants(c *check.C) {
+       s.makeArvadosRepo(c)
+       for _, repo := range []string{"active/foo.git", "active/foo/.git", "arvados.git", "arvados/.git"} {
+               err := s.runGit(c, spectatorToken, "fetch", repo)
+               c.Assert(err, check.Equals, nil)
+       }
+}
+
+func (s *IntegrationSuite) TestReadonly(c *check.C) {
+       err := s.runGit(c, spectatorToken, "fetch", "active/foo.git")
+       c.Assert(err, check.Equals, nil)
+       err = s.runGit(c, spectatorToken, "push", "active/foo.git", "master:newbranchfail")
+       c.Assert(err, check.ErrorMatches, `.*HTTP code = 403.*`)
+       _, err = os.Stat(s.tmpRepoRoot + "/zzzzz-s0uqq-382brsig8rp3666/.git/refs/heads/newbranchfail")
+       c.Assert(err, check.FitsTypeOf, &os.PathError{})
+}
+
+func (s *IntegrationSuite) TestReadwrite(c *check.C) {
+       err := s.runGit(c, activeToken, "fetch", "active/foo.git")
+       c.Assert(err, check.Equals, nil)
+       err = s.runGit(c, activeToken, "push", "active/foo.git", "master:newbranch")
+       c.Assert(err, check.Equals, nil)
+       _, err = os.Stat(s.tmpRepoRoot + "/zzzzz-s0uqq-382brsig8rp3666/.git/refs/heads/newbranch")
+       c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TestNonexistent(c *check.C) {
+       err := s.runGit(c, spectatorToken, "fetch", "thisrepodoesnotexist.git")
+       c.Assert(err, check.ErrorMatches, `.* not found.*`)
+}
+
+func (s *IntegrationSuite) TestMissingGitdirReadableRepository(c *check.C) {
+       err := s.runGit(c, activeToken, "fetch", "active/foo2.git")
+       c.Assert(err, check.ErrorMatches, `.* not found.*`)
+}
+
+func (s *IntegrationSuite) TestNoPermission(c *check.C) {
+       for _, repo := range []string{"active/foo.git", "active/foo/.git"} {
+               err := s.runGit(c, anonymousToken, "fetch", repo)
+               c.Assert(err, check.ErrorMatches, `.* not found.*`)
+       }
+}
+
+func (s *IntegrationSuite) TestExpiredToken(c *check.C) {
+       for _, repo := range []string{"active/foo.git", "active/foo/.git"} {
+               err := s.runGit(c, expiredToken, "fetch", repo)
+               c.Assert(err, check.ErrorMatches, `.* 500 while accessing.*`)
+       }
+}
+
+func (s *IntegrationSuite) TestInvalidToken(c *check.C) {
+       for _, repo := range []string{"active/foo.git", "active/foo/.git"} {
+               err := s.runGit(c, "s3cr3tp@ssw0rd", "fetch", repo)
+               c.Assert(err, check.ErrorMatches, `.* requested URL returned error.*`)
+       }
+}
+
+func (s *IntegrationSuite) TestShortToken(c *check.C) {
+       for _, repo := range []string{"active/foo.git", "active/foo/.git"} {
+               err := s.runGit(c, "s3cr3t", "fetch", repo)
+               c.Assert(err, check.ErrorMatches, `.* 500 while accessing.*`)
+       }
+}
+
+func (s *IntegrationSuite) TestShortTokenBadReq(c *check.C) {
+       for _, repo := range []string{"bogus"} {
+               err := s.runGit(c, "s3cr3t", "fetch", repo)
+               c.Assert(err, check.ErrorMatches, `.* requested URL returned error.*`)
+       }
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+       arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+       arvadostest.ResetEnv()
+       s.testServer = &server{}
+       var err error
+       s.tmpRepoRoot, err = ioutil.TempDir("", "arv-git-httpd")
+       c.Assert(err, check.Equals, nil)
+       s.tmpWorkdir, err = ioutil.TempDir("", "arv-git-httpd")
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("git", "init", s.tmpRepoRoot+"/zzzzz-s0uqq-382brsig8rp3666").Output()
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("sh", "-c", "cd "+s.tmpRepoRoot+"/zzzzz-s0uqq-382brsig8rp3666 && echo test >test && git add test && git -c user.name=Foo -c user.email=Foo commit -am 'foo: test'").CombinedOutput()
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("git", "init", s.tmpWorkdir).Output()
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("sh", "-c", "cd "+s.tmpWorkdir+" && echo work >work && git add work && git -c user.name=Foo -c user.email=Foo commit -am 'workdir: test'").CombinedOutput()
+       c.Assert(err, check.Equals, nil)
+
+       _, err = exec.Command("git", "config",
+               "--file", s.tmpWorkdir+"/.git/config",
+               "credential.http://"+s.testServer.Addr+"/.helper",
+               "!cred(){ cat >/dev/null; if [ \"$1\" = get ]; then echo password=$ARVADOS_API_TOKEN; fi; };cred").Output()
+       c.Assert(err, check.Equals, nil)
+       _, err = exec.Command("git", "config",
+               "--file", s.tmpWorkdir+"/.git/config",
+               "credential.http://"+s.testServer.Addr+"/.username",
+               "none").Output()
+       c.Assert(err, check.Equals, nil)
+
+       theConfig = &config{
+               Addr:       ":0",
+               GitCommand: "/usr/bin/git",
+               Root:       s.tmpRepoRoot,
+       }
+       err = s.testServer.Start()
+       c.Assert(err, check.Equals, nil)
+
+       // Clear ARVADOS_API_TOKEN after starting up the server, to
+       // make sure arv-git-httpd doesn't use it.
+       os.Setenv("ARVADOS_API_TOKEN", "unused-token-placates-client-library")
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+       var err error
+       if s.testServer != nil {
+               err = s.testServer.Close()
+       }
+       c.Check(err, check.Equals, nil)
+       if s.tmpRepoRoot != "" {
+               err = os.RemoveAll(s.tmpRepoRoot)
+               c.Check(err, check.Equals, nil)
+       }
+       if s.tmpWorkdir != "" {
+               err = os.RemoveAll(s.tmpWorkdir)
+               c.Check(err, check.Equals, nil)
+       }
+}
+
+func (s *IntegrationSuite) runGit(c *check.C, token, gitCmd, repo string, args ...string) error {
+       cwd, err := os.Getwd()
+       c.Assert(err, check.Equals, nil)
+       defer os.Chdir(cwd)
+       os.Chdir(s.tmpWorkdir)
+
+       gitargs := append([]string{
+               gitCmd, "http://" + s.testServer.Addr + "/" + repo,
+       }, args...)
+       cmd := exec.Command("git", gitargs...)
+       cmd.Env = append(os.Environ(), "ARVADOS_API_TOKEN="+token)
+       w, err := cmd.StdinPipe()
+       c.Assert(err, check.Equals, nil)
+       w.Close()
+       output, err := cmd.CombinedOutput()
+       c.Log("git ", gitargs, " => ", err)
+       c.Log(string(output))
+       if err != nil && len(output) > 0 {
+               // If messages appeared on stderr, they are more
+               // helpful than the err returned by CombinedOutput().
+               //
+               // Easier to match error strings without newlines:
+               err = errors.New(strings.Replace(string(output), "\n", " // ", -1))
+       }
+       return err
+}
+
+// Make a bare arvados repo at {tmpRepoRoot}/arvados.git
+func (s *IntegrationSuite) makeArvadosRepo(c *check.C) {
+       msg, err := exec.Command("git", "init", "--bare", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git").CombinedOutput()
+       c.Log(string(msg))
+       c.Assert(err, check.Equals, nil)
+       msg, err = exec.Command("git", "--git-dir", s.tmpRepoRoot+"/zzzzz-s0uqq-arvadosrepo0123.git", "fetch", "../../.git", "HEAD:master").CombinedOutput()
+       c.Log(string(msg))
+       c.Assert(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
diff --git a/services/crunchstat/.gitignore b/services/crunchstat/.gitignore

new file mode 100644 (file)

index 0000000..c26270a
--- /dev/null
+++ b/services/crunchstat/.gitignore
@@ -0,0 +1 @@
+crunchstat
diff --git a/services/crunchstat/crunchstat.go b/services/crunchstat/crunchstat.go

index 1a22e264f1ab734dafe052a589f86bfc2be6597e..e14912423db73483ef2623149e23d3ca63b3dabb 100644 (file)
--- a/services/crunchstat/crunchstat.go
+++ b/services/crunchstat/crunchstat.go
@@ -86,11 +86,19 @@ var reportedStatFile = map[string]string{}
  // cgroup root for the given statgroup. (This will avoid falling back
  // to host-level stats during container setup and teardown.)
  func OpenStatFile(cgroup Cgroup, statgroup string, stat string) (*os.File, error) {
-       var paths = []string{
-               fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat),
-               fmt.Sprintf("%s/%s/%s/%s", cgroup.root, cgroup.parent, cgroup.cid, stat),
-               fmt.Sprintf("%s/%s/%s", cgroup.root, statgroup, stat),
-               fmt.Sprintf("%s/%s", cgroup.root, stat),
+       var paths []string
+       if cgroup.cid != "" {
+               // Collect container's stats
+               paths = []string{
+                       fmt.Sprintf("%s/%s/%s/%s/%s", cgroup.root, statgroup, cgroup.parent, cgroup.cid, stat),
+                       fmt.Sprintf("%s/%s/%s/%s", cgroup.root, cgroup.parent, cgroup.cid, stat),
+               }
+       } else {
+               // Collect this host's stats
+               paths = []string{
+                       fmt.Sprintf("%s/%s/%s", cgroup.root, statgroup, stat),
+                       fmt.Sprintf("%s/%s", cgroup.root, stat),
+               }
         }
         var path string
         var file *os.File
@@ -110,12 +118,14 @@ func OpenStatFile(cgroup Cgroup, statgroup string, stat string) (*os.File, error
                 // whether we happen to collect stats [a] before any
                 // processes have been created in the container and
                 // [b] after all contained processes have exited.
-               reportedStatFile[stat] = path
                 if path == "" {
-                       statLog.Printf("error finding stats file: stat %s, statgroup %s, cid %s, parent %s, root %s\n", stat, statgroup, cgroup.cid, cgroup.parent, cgroup.root)
+                       statLog.Printf("notice: stats not available: stat %s, statgroup %s, cid %s, parent %s, root %s\n", stat, statgroup, cgroup.cid, cgroup.parent, cgroup.root)
+               } else if ok {
+                       statLog.Printf("notice: stats moved from %s to %s\n", reportedStatFile[stat], path)
                 } else {
-                       statLog.Printf("error reading stats from %s\n", path)
+                       statLog.Printf("notice: reading stats from %s\n", path)
                 }
+               reportedStatFile[stat] = path
         }
         return file, err
  }
diff --git a/services/datamanager/collection/collection.go b/services/datamanager/collection/collection.go

index 9a7a838f1b9db71eea07bf88cd98316f5e132fd3..5519ad8670ec93611740268d34f845e5c104fffe 100644 (file)
--- a/services/datamanager/collection/collection.go
+++ b/services/datamanager/collection/collection.go
@@ -1,4 +1,4 @@
-/* Deals with parsing Collection responses from API Server. */
+// Deals with parsing Collection responses from API Server.
  
  package collection
  
@@ -13,13 +13,12 @@ import (
         "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
         "log"
         "os"
-       "runtime"
         "runtime/pprof"
         "time"
  )
  
  var (
-       heap_profile_filename string
+       heapProfileFilename string
         // globals for debugging
         totalManifestSize uint64
         maxManifestSize   uint64
@@ -34,9 +33,13 @@ type Collection struct {
  }
  
  type ReadCollections struct {
-       ReadAllCollections    bool
-       UuidToCollection      map[string]Collection
-       OwnerToCollectionSize map[string]int
+       ReadAllCollections        bool
+       UuidToCollection          map[string]Collection
+       OwnerToCollectionSize     map[string]int
+       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
+       CollectionUuidToIndex     map[string]int
+       CollectionIndexToUuid     []string
+       BlockToCollectionIndices  map[blockdigest.DigestWithSize][]int
  }
  
  type GetCollectionsParams struct {
@@ -59,7 +62,7 @@ type SdkCollectionList struct {
  }
  
  func init() {
-       flag.StringVar(&heap_profile_filename,
+       flag.StringVar(&heapProfileFilename,
                 "heap-profile",
                 "",
                 "File to write the heap profiles to. Leave blank to skip profiling.")
@@ -72,9 +75,9 @@ func init() {
  // Otherwise we would see cumulative numbers as explained here:
  // https://groups.google.com/d/msg/golang-nuts/ZyHciRglQYc/2nh4Ndu2fZcJ
  func WriteHeapProfile() {
-       if heap_profile_filename != "" {
+       if heapProfileFilename != "" {
  
-               heap_profile, err := os.Create(heap_profile_filename)
+               heap_profile, err := os.Create(heapProfileFilename)
                 if err != nil {
                         log.Fatal(err)
                 }
@@ -90,17 +93,7 @@ func WriteHeapProfile() {
  
  func GetCollectionsAndSummarize(params GetCollectionsParams) (results ReadCollections) {
         results = GetCollections(params)
-       ComputeSizeOfOwnedCollections(&results)
-
-       if params.Logger != nil {
-               params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := p["collection_info"].(map[string]interface{})
-                       // Since maps are shallow copied, we run a risk of concurrent
-                       // updates here. By copying results.OwnerToCollectionSize into
-                       // the log, we're assuming that it won't be updated.
-                       collectionInfo["owner_to_collection_size"] = results.OwnerToCollectionSize
-               })
-       }
+       results.Summarize(params.Logger)
  
         log.Printf("Uuid to Size used: %v", results.OwnerToCollectionSize)
         log.Printf("Read and processed %d collections",
@@ -125,7 +118,6 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
         fieldsWanted := []string{"manifest_text",
                 "owner_uuid",
                 "uuid",
-               // TODO(misha): Start using the redundancy field.
                 "redundancy",
                 "modified_at"}
  
@@ -138,6 +130,23 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
                 sdkParams["limit"] = params.BatchSize
         }
  
+       var defaultReplicationLevel int
+       {
+               value, err := params.Client.Discovery("defaultCollectionReplication")
+               if err != nil {
+                       loggerutil.FatalWithMessage(params.Logger,
+                               fmt.Sprintf("Error querying default collection replication: %v", err))
+               }
+
+               defaultReplicationLevel = int(value.(float64))
+               if defaultReplicationLevel <= 0 {
+                       loggerutil.FatalWithMessage(params.Logger,
+                               fmt.Sprintf("Default collection replication returned by arvados SDK "+
+                                       "should be a positive integer but instead it was %d.",
+                                       defaultReplicationLevel))
+               }
+       }
+
         initialNumberOfCollectionsAvailable, err :=
                 util.NumberItemsAvailable(params.Client, "collections")
         if err != nil {
@@ -152,10 +161,10 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
  
         if params.Logger != nil {
                 params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       collectionInfo := make(map[string]interface{})
+                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
                         collectionInfo["num_collections_at_start"] = initialNumberOfCollectionsAvailable
                         collectionInfo["batch_size"] = params.BatchSize
-                       p["collection_info"] = collectionInfo
+                       collectionInfo["default_replication_level"] = defaultReplicationLevel
                 })
         }
  
@@ -181,6 +190,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
                 sdkParams["filters"].([][]string)[0][2] =
                         ProcessCollections(params.Logger,
                                 collections.Items,
+                               defaultReplicationLevel,
                                 results.UuidToCollection).Format(time.RFC3339)
  
                 // update counts
@@ -197,7 +207,7 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
  
                 if params.Logger != nil {
                         params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               collectionInfo := p["collection_info"].(map[string]interface{})
+                               collectionInfo := logger.GetOrCreateMap(p, "collection_info")
                                 collectionInfo["collections_read"] = totalCollections
                                 collectionInfo["latest_modified_date_seen"] = sdkParams["filters"].([][]string)[0][2]
                                 collectionInfo["total_manifest_size"] = totalManifestSize
@@ -206,9 +216,6 @@ func GetCollections(params GetCollectionsParams) (results ReadCollections) {
                 }
         }
  
-       // Just in case this lowers the numbers reported in the heap profile.
-       runtime.GC()
-
         // Write the heap profile for examining memory usage
         WriteHeapProfile()
  
@@ -224,6 +231,7 @@ func StrCopy(s string) string {
  
  func ProcessCollections(arvLogger *logger.Logger,
         receivedCollections []SdkCollectionInfo,
+       defaultReplicationLevel int,
         uuidToCollection map[string]Collection) (latestModificationDate time.Time) {
         for _, sdkCollection := range receivedCollections {
                 collection := Collection{Uuid: StrCopy(sdkCollection.Uuid),
@@ -235,15 +243,20 @@ func ProcessCollections(arvLogger *logger.Logger,
                         loggerutil.FatalWithMessage(arvLogger,
                                 fmt.Sprintf(
                                         "Arvados SDK collection returned with unexpected zero "+
-                                               "modifcation date. This probably means that either we failed to "+
+                                               "modification date. This probably means that either we failed to "+
                                                 "parse the modification date or the API server has changed how "+
-                                               "it returns modification dates: %v",
+                                               "it returns modification dates: %+v",
                                         collection))
                 }
  
                 if sdkCollection.ModifiedAt.After(latestModificationDate) {
                         latestModificationDate = sdkCollection.ModifiedAt
                 }
+
+               if collection.ReplicationLevel == 0 {
+                       collection.ReplicationLevel = defaultReplicationLevel
+               }
+
                 manifest := manifest.Manifest{sdkCollection.ManifestText}
                 manifestSize := uint64(len(sdkCollection.ManifestText))
  
@@ -282,11 +295,47 @@ func ProcessCollections(arvLogger *logger.Logger,
         return
  }
  
-func ComputeSizeOfOwnedCollections(readCollections *ReadCollections) {
+func (readCollections *ReadCollections) Summarize(arvLogger *logger.Logger) {
         readCollections.OwnerToCollectionSize = make(map[string]int)
+       readCollections.BlockToDesiredReplication = make(map[blockdigest.DigestWithSize]int)
+       numCollections := len(readCollections.UuidToCollection)
+       readCollections.CollectionUuidToIndex = make(map[string]int, numCollections)
+       readCollections.CollectionIndexToUuid = make([]string, 0, numCollections)
+       readCollections.BlockToCollectionIndices = make(map[blockdigest.DigestWithSize][]int)
+
         for _, coll := range readCollections.UuidToCollection {
+               collectionIndex := len(readCollections.CollectionIndexToUuid)
+               readCollections.CollectionIndexToUuid =
+                       append(readCollections.CollectionIndexToUuid, coll.Uuid)
+               readCollections.CollectionUuidToIndex[coll.Uuid] = collectionIndex
+
                 readCollections.OwnerToCollectionSize[coll.OwnerUuid] =
                         readCollections.OwnerToCollectionSize[coll.OwnerUuid] + coll.TotalSize
+
+               for block, size := range coll.BlockDigestToSize {
+                       locator := blockdigest.DigestWithSize{Digest: block, Size: uint32(size)}
+                       readCollections.BlockToCollectionIndices[locator] =
+                               append(readCollections.BlockToCollectionIndices[locator],
+                                       collectionIndex)
+                       storedReplication := readCollections.BlockToDesiredReplication[locator]
+                       if coll.ReplicationLevel > storedReplication {
+                               readCollections.BlockToDesiredReplication[locator] =
+                                       coll.ReplicationLevel
+                       }
+               }
+       }
+
+       if arvLogger != nil {
+               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
+                       collectionInfo := logger.GetOrCreateMap(p, "collection_info")
+                       // Since maps are shallow copied, we run a risk of concurrent
+                       // updates here. By copying results.OwnerToCollectionSize into
+                       // the log, we're assuming that it won't be updated.
+                       collectionInfo["owner_to_collection_size"] =
+                               readCollections.OwnerToCollectionSize
+                       collectionInfo["distinct_blocks_named"] =
+                               len(readCollections.BlockToDesiredReplication)
+               })
         }
  
         return
diff --git a/services/datamanager/collection/collection_test.go b/services/datamanager/collection/collection_test.go

new file mode 100644 (file)

index 0000000..1669bb7
--- /dev/null
+++ b/services/datamanager/collection/collection_test.go
@@ -0,0 +1,123 @@
+package collection
+
+import (
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       . "gopkg.in/check.v1"
+       "testing"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type MySuite struct{}
+
+var _ = Suite(&MySuite{})
+
+// This captures the result we expect from
+// ReadCollections.Summarize().  Because CollectionUuidToIndex is
+// indeterminate, we replace BlockToCollectionIndices with
+// BlockToCollectionUuids.
+type ExpectedSummary struct {
+       OwnerToCollectionSize     map[string]int
+       BlockToDesiredReplication map[blockdigest.DigestWithSize]int
+       BlockToCollectionUuids    map[blockdigest.DigestWithSize][]string
+}
+
+func CompareSummarizedReadCollections(c *C,
+       summarized ReadCollections,
+       expected ExpectedSummary) {
+
+       c.Assert(summarized.OwnerToCollectionSize, DeepEquals,
+               expected.OwnerToCollectionSize)
+
+       c.Assert(summarized.BlockToDesiredReplication, DeepEquals,
+               expected.BlockToDesiredReplication)
+
+       summarizedBlockToCollectionUuids :=
+               make(map[blockdigest.DigestWithSize]map[string]struct{})
+       for digest, indices := range summarized.BlockToCollectionIndices {
+               uuidSet := make(map[string]struct{})
+               summarizedBlockToCollectionUuids[digest] = uuidSet
+               for _, index := range indices {
+                       uuidSet[summarized.CollectionIndexToUuid[index]] = struct{}{}
+               }
+       }
+
+       expectedBlockToCollectionUuids :=
+               make(map[blockdigest.DigestWithSize]map[string]struct{})
+       for digest, uuidSlice := range expected.BlockToCollectionUuids {
+               uuidSet := make(map[string]struct{})
+               expectedBlockToCollectionUuids[digest] = uuidSet
+               for _, uuid := range uuidSlice {
+                       uuidSet[uuid] = struct{}{}
+               }
+       }
+
+       c.Assert(summarizedBlockToCollectionUuids, DeepEquals,
+               expectedBlockToCollectionUuids)
+}
+
+func (s *MySuite) TestSummarizeSimple(checker *C) {
+       rc := MakeTestReadCollections([]TestCollectionSpec{TestCollectionSpec{
+               ReplicationLevel: 5,
+               Blocks:           []int{1, 2},
+       }})
+
+       rc.Summarize(nil)
+
+       c := rc.UuidToCollection["col0"]
+
+       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
+
+       expected := ExpectedSummary{
+               OwnerToCollectionSize:     map[string]int{c.OwnerUuid: c.TotalSize},
+               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{blockDigest1: 5, blockDigest2: 5},
+               BlockToCollectionUuids:    map[blockdigest.DigestWithSize][]string{blockDigest1: []string{c.Uuid}, blockDigest2: []string{c.Uuid}},
+       }
+
+       CompareSummarizedReadCollections(checker, rc, expected)
+}
+
+func (s *MySuite) TestSummarizeOverlapping(checker *C) {
+       rc := MakeTestReadCollections([]TestCollectionSpec{
+               TestCollectionSpec{
+                       ReplicationLevel: 5,
+                       Blocks:           []int{1, 2},
+               },
+               TestCollectionSpec{
+                       ReplicationLevel: 8,
+                       Blocks:           []int{2, 3},
+               },
+       })
+
+       rc.Summarize(nil)
+
+       c0 := rc.UuidToCollection["col0"]
+       c1 := rc.UuidToCollection["col1"]
+
+       blockDigest1 := blockdigest.MakeTestDigestWithSize(1)
+       blockDigest2 := blockdigest.MakeTestDigestWithSize(2)
+       blockDigest3 := blockdigest.MakeTestDigestWithSize(3)
+
+       expected := ExpectedSummary{
+               OwnerToCollectionSize: map[string]int{
+                       c0.OwnerUuid: c0.TotalSize,
+                       c1.OwnerUuid: c1.TotalSize,
+               },
+               BlockToDesiredReplication: map[blockdigest.DigestWithSize]int{
+                       blockDigest1: 5,
+                       blockDigest2: 8,
+                       blockDigest3: 8,
+               },
+               BlockToCollectionUuids: map[blockdigest.DigestWithSize][]string{
+                       blockDigest1: []string{c0.Uuid},
+                       blockDigest2: []string{c0.Uuid, c1.Uuid},
+                       blockDigest3: []string{c1.Uuid},
+               },
+       }
+
+       CompareSummarizedReadCollections(checker, rc, expected)
+}
diff --git a/services/datamanager/collection/testing.go b/services/datamanager/collection/testing.go

new file mode 100644 (file)

index 0000000..f3c1f47
--- /dev/null
+++ b/services/datamanager/collection/testing.go
@@ -0,0 +1,60 @@
+// Code used for testing only.
+
+package collection
+
+import (
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+)
+
+type TestCollectionSpec struct {
+       // The desired replication level
+       ReplicationLevel int
+       // Blocks this contains, represented by ints. Ints repeated will
+       // still only represent one block
+       Blocks []int
+}
+
+// Creates a ReadCollections object for testing based on the give
+// specs.  Only the ReadAllCollections and UuidToCollection fields are
+// populated.  To populate other fields call rc.Summarize().
+func MakeTestReadCollections(specs []TestCollectionSpec) (rc ReadCollections) {
+       rc = ReadCollections{
+               ReadAllCollections: true,
+               UuidToCollection:   map[string]Collection{},
+       }
+
+       for i, spec := range specs {
+               c := Collection{
+                       Uuid:              fmt.Sprintf("col%d", i),
+                       OwnerUuid:         fmt.Sprintf("owner%d", i),
+                       ReplicationLevel:  spec.ReplicationLevel,
+                       BlockDigestToSize: map[blockdigest.BlockDigest]int{},
+               }
+               rc.UuidToCollection[c.Uuid] = c
+               for _, j := range spec.Blocks {
+                       c.BlockDigestToSize[blockdigest.MakeTestBlockDigest(j)] = j
+               }
+               // We compute the size in a separate loop because the value
+               // computed in the above loop would be invalid if c.Blocks
+               // contained duplicates.
+               for _, size := range c.BlockDigestToSize {
+                       c.TotalSize += size
+               }
+       }
+       return
+}
+
+// Returns a slice giving the collection index of each collection that
+// was passed in to MakeTestReadCollections. rc.Summarize() must be
+// called before this method, since Summarize() assigns an index to
+// each collection.
+func (rc ReadCollections) CollectionIndicesForTesting() (indices []int) {
+       // TODO(misha): Assert that rc.Summarize() has been called.
+       numCollections := len(rc.CollectionIndexToUuid)
+       indices = make([]int, numCollections)
+       for i := 0; i < numCollections; i++ {
+               indices[i] = rc.CollectionUuidToIndex[fmt.Sprintf("col%d", i)]
+       }
+       return
+}
diff --git a/services/datamanager/datamanager.go b/services/datamanager/datamanager.go

index a8e506eacb1d0f5c533d005a7a3b321b9c6b76d3..70a9ae785956396bab936e73b1a7f6ed04c63731 100644 (file)
--- a/services/datamanager/datamanager.go
+++ b/services/datamanager/datamanager.go
@@ -4,12 +4,15 @@ package main
  
  import (
         "flag"
+       "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
         "git.curoverse.com/arvados.git/sdk/go/logger"
         "git.curoverse.com/arvados.git/sdk/go/util"
         "git.curoverse.com/arvados.git/services/datamanager/collection"
         "git.curoverse.com/arvados.git/services/datamanager/keep"
         "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "git.curoverse.com/arvados.git/services/datamanager/summary"
         "log"
         "time"
  )
@@ -38,19 +41,25 @@ func init() {
  func main() {
         flag.Parse()
         if minutesBetweenRuns == 0 {
-               singlerun()
+               err := singlerun()
+               if err != nil {
+                       log.Fatalf("Got an error: %v", err)
+               }
         } else {
                 waitTime := time.Minute * time.Duration(minutesBetweenRuns)
                 for {
                         log.Println("Beginning Run")
-                       singlerun()
+                       err := singlerun()
+                       if err != nil {
+                               log.Printf("Got an error: %v", err)
+                       }
                         log.Printf("Sleeping for %d minutes", minutesBetweenRuns)
                         time.Sleep(waitTime)
                 }
         }
  }
  
-func singlerun() {
+func singlerun() error {
         arv, err := arvadosclient.MakeArvadosClient()
         if err != nil {
                 log.Fatalf("Error setting up arvados client %s", err.Error())
@@ -64,7 +73,8 @@ func singlerun() {
  
         var arvLogger *logger.Logger
         if logEventTypePrefix != "" {
-               arvLogger = logger.NewLogger(logger.LoggerParams{Client: arv,
+               arvLogger = logger.NewLogger(logger.LoggerParams{
+                       Client:          arv,
                         EventTypePrefix: logEventTypePrefix,
                         WriteInterval:   time.Second * time.Duration(logFrequencySeconds)})
         }
@@ -74,28 +84,103 @@ func singlerun() {
                 arvLogger.AddWriteHook(loggerutil.LogMemoryAlloc)
         }
  
-       collectionChannel := make(chan collection.ReadCollections)
+       var (
+               dataFetcher     summary.DataFetcher
+               readCollections collection.ReadCollections
+               keepServerInfo  keep.ReadServers
+       )
+
+       if summary.ShouldReadData() {
+               dataFetcher = summary.ReadData
+       } else {
+               dataFetcher = BuildDataFetcher(arv)
+       }
  
-       go func() {
-               collectionChannel <- collection.GetCollectionsAndSummarize(
-                       collection.GetCollectionsParams{
-                               Client: arv, Logger: arvLogger, BatchSize: 50})
-       }()
+       dataFetcher(arvLogger, &readCollections, &keepServerInfo)
  
-       keepServerInfo := keep.GetKeepServersAndSummarize(
-               keep.GetKeepServersParams{Client: arv, Logger: arvLogger, Limit: 1000})
+       summary.MaybeWriteData(arvLogger, readCollections, keepServerInfo)
  
-       readCollections := <-collectionChannel
+       buckets := summary.BucketReplication(readCollections, keepServerInfo)
+       bucketCounts := buckets.Counts()
  
-       // TODO(misha): Use these together to verify replication.
-       _ = readCollections
-       _ = keepServerInfo
+       replicationSummary := buckets.SummarizeBuckets(readCollections)
+       replicationCounts := replicationSummary.ComputeCounts()
+
+       log.Printf("Blocks In Collections: %d, "+
+               "\nBlocks In Keep: %d.",
+               len(readCollections.BlockToDesiredReplication),
+               len(keepServerInfo.BlockToServers))
+       log.Println(replicationCounts.PrettyPrint())
+
+       log.Printf("Blocks Histogram:")
+       for _, rlbss := range bucketCounts {
+               log.Printf("%+v: %10d",
+                       rlbss.Levels,
+                       rlbss.Count)
+       }
+
+       kc, err := keepclient.MakeKeepClient(&arv)
+       if err != nil {
+               loggerutil.FatalWithMessage(arvLogger,
+                       fmt.Sprintf("Error setting up keep client %s", err.Error()))
+       }
  
         // Log that we're finished. We force the recording, since go will
-       // not wait for the timer before exiting.
+       // not wait for the write timer before exiting.
         if arvLogger != nil {
-               arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
+               defer arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
+                       summaryInfo := logger.GetOrCreateMap(p, "summary_info")
+                       summaryInfo["block_replication_counts"] = bucketCounts
+                       summaryInfo["replication_summary"] = replicationCounts
+                       p["summary_info"] = summaryInfo
+
                         p["run_info"].(map[string]interface{})["finished_at"] = time.Now()
                 })
         }
+
+       pullServers := summary.ComputePullServers(kc,
+               &keepServerInfo,
+               readCollections.BlockToDesiredReplication,
+               replicationSummary.UnderReplicatedBlocks)
+
+       pullLists := summary.BuildPullLists(pullServers)
+
+       trashLists, trashErr := summary.BuildTrashLists(kc,
+               &keepServerInfo,
+               replicationSummary.KeepBlocksNotInCollections)
+
+       summary.WritePullLists(arvLogger, pullLists)
+
+       if trashErr != nil {
+               return err
+       } else {
+               keep.SendTrashLists(keep.GetDataManagerToken(arvLogger), kc, trashLists)
+       }
+
+       return nil
+}
+
+// Returns a data fetcher that fetches data from remote servers.
+func BuildDataFetcher(arv arvadosclient.ArvadosClient) summary.DataFetcher {
+       return func(arvLogger *logger.Logger,
+               readCollections *collection.ReadCollections,
+               keepServerInfo *keep.ReadServers) {
+               collectionChannel := make(chan collection.ReadCollections)
+
+               go func() {
+                       collectionChannel <- collection.GetCollectionsAndSummarize(
+                               collection.GetCollectionsParams{
+                                       Client:    arv,
+                                       Logger:    arvLogger,
+                                       BatchSize: 50})
+               }()
+
+               *keepServerInfo = keep.GetKeepServersAndSummarize(
+                       keep.GetKeepServersParams{
+                               Client: arv,
+                               Logger: arvLogger,
+                               Limit:  1000})
+
+               *readCollections = <-collectionChannel
+       }
  }
diff --git a/services/datamanager/keep/keep.go b/services/datamanager/keep/keep.go

index 93246bc47227c345662fbdc0247d0548728c4aa6..0e3cc1d44e79ef7b533a8cccef77a7c5cf6c6605 100644 (file)
--- a/services/datamanager/keep/keep.go
+++ b/services/datamanager/keep/keep.go
@@ -5,13 +5,15 @@ package keep
  import (
         "bufio"
         "encoding/json"
+       "errors"
         "flag"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
         "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
         "git.curoverse.com/arvados.git/sdk/go/logger"
-       "git.curoverse.com/arvados.git/sdk/go/manifest"
         "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "io"
         "io/ioutil"
         "log"
         "net/http"
@@ -22,6 +24,7 @@ import (
  )
  
  type ServerAddress struct {
+       SSL  bool   `json:service_ssl_flag`
         Host string `json:"service_host"`
         Port int    `json:"service_port"`
         Uuid string `json:"uuid"`
@@ -29,20 +32,18 @@ type ServerAddress struct {
  
  // Info about a particular block returned by the server
  type BlockInfo struct {
-       Digest blockdigest.BlockDigest
-       Size   int
+       Digest blockdigest.DigestWithSize
         Mtime  int64 // TODO(misha): Replace this with a timestamp.
  }
  
  // Info about a specified block given by a server
  type BlockServerInfo struct {
         ServerIndex int
-       Size        int
         Mtime       int64 // TODO(misha): Replace this with a timestamp.
  }
  
  type ServerContents struct {
-       BlockDigestToInfo map[blockdigest.BlockDigest]BlockInfo
+       BlockDigestToInfo map[blockdigest.DigestWithSize]BlockInfo
  }
  
  type ServerResponse struct {
@@ -55,7 +56,7 @@ type ReadServers struct {
         KeepServerIndexToAddress []ServerAddress
         KeepServerAddressToIndex map[ServerAddress]int
         ServerToContents         map[ServerAddress]ServerContents
-       BlockToServers           map[blockdigest.BlockDigest][]BlockServerInfo
+       BlockToServers           map[blockdigest.DigestWithSize][]BlockServerInfo
         BlockReplicationCounts   map[int]int
  }
  
@@ -87,10 +88,18 @@ func init() {
  
  // TODO(misha): Change this to include the UUID as well.
  func (s ServerAddress) String() string {
-       return fmt.Sprintf("%s:%d", s.Host, s.Port)
+       return s.URL()
  }
  
-func getDataManagerToken(arvLogger *logger.Logger) string {
+func (s ServerAddress) URL() string {
+       if s.SSL {
+               return fmt.Sprintf("https://%s:%d", s.Host, s.Port)
+       } else {
+               return fmt.Sprintf("http://%s:%d", s.Host, s.Port)
+       }
+}
+
+func GetDataManagerToken(arvLogger *logger.Logger) string {
         readDataManagerToken := func() {
                 if dataManagerTokenFile == "" {
                         flag.Usage()
@@ -116,7 +125,7 @@ func GetKeepServersAndSummarize(params GetKeepServersParams) (results ReadServer
         results = GetKeepServers(params)
         log.Printf("Returned %d keep disks", len(results.ServerToContents))
  
-       ComputeBlockReplicationCounts(&results)
+       results.Summarize(params.Logger)
         log.Printf("Replication level distribution: %v",
                 results.BlockReplicationCounts)
  
@@ -146,13 +155,10 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
  
         if params.Logger != nil {
                 params.Logger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := make(map[string]interface{})
-
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
                         keepInfo["num_keep_servers_available"] = sdkResponse.ItemsAvailable
                         keepInfo["num_keep_servers_received"] = len(sdkResponse.KeepServers)
                         keepInfo["keep_servers"] = sdkResponse.KeepServers
-
-                       p["keep_info"] = keepInfo
                 })
         }
  
@@ -191,7 +197,7 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
         }
  
         results.ServerToContents = make(map[ServerAddress]ServerContents)
-       results.BlockToServers = make(map[blockdigest.BlockDigest][]BlockServerInfo)
+       results.BlockToServers = make(map[blockdigest.DigestWithSize][]BlockServerInfo)
  
         // Read all the responses
         for i := range sdkResponse.KeepServers {
@@ -206,7 +212,6 @@ func GetKeepServers(params GetKeepServersParams) (results ReadServers) {
                         results.BlockToServers[blockInfo.Digest] = append(
                                 results.BlockToServers[blockInfo.Digest],
                                 BlockServerInfo{ServerIndex: serverIndex,
-                                       Size:  blockInfo.Size,
                                         Mtime: blockInfo.Mtime})
                 }
         }
@@ -223,7 +228,10 @@ func GetServerContents(arvLogger *logger.Logger,
         resp, err := client.Do(req)
         if err != nil {
                 loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Error fetching %s: %v", req.URL.String(), err))
+                       fmt.Sprintf("Error fetching %s: %v. Response was %+v",
+                               req.URL.String(),
+                               err,
+                               resp))
         }
  
         return ReadServerResponse(arvLogger, keepServer, resp)
@@ -239,7 +247,7 @@ func GetServerStatus(arvLogger *logger.Logger,
         if arvLogger != nil {
                 now := time.Now()
                 arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := p["keep_info"].(map[string]interface{})
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
                         serverInfo := make(map[string]interface{})
                         serverInfo["status_request_sent_at"] = now
                         serverInfo["host"] = keepServer.Host
@@ -272,7 +280,7 @@ func GetServerStatus(arvLogger *logger.Logger,
         if arvLogger != nil {
                 now := time.Now()
                 arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := p["keep_info"].(map[string]interface{})
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
                         serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
                         serverInfo["status_response_processed_at"] = now
                         serverInfo["status"] = keepStatus
@@ -288,7 +296,7 @@ func CreateIndexRequest(arvLogger *logger.Logger,
         if arvLogger != nil {
                 now := time.Now()
                 arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := p["keep_info"].(map[string]interface{})
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
                         serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
                         serverInfo["index_request_sent_at"] = now
                 })
@@ -301,7 +309,7 @@ func CreateIndexRequest(arvLogger *logger.Logger,
         }
  
         req.Header.Add("Authorization",
-               fmt.Sprintf("OAuth2 %s", getDataManagerToken(arvLogger)))
+               fmt.Sprintf("OAuth2 %s", GetDataManagerToken(arvLogger)))
         return
  }
  
@@ -319,7 +327,7 @@ func ReadServerResponse(arvLogger *logger.Logger,
         if arvLogger != nil {
                 now := time.Now()
                 arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       keepInfo := p["keep_info"].(map[string]interface{})
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
                         serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
                         serverInfo["index_response_received_at"] = now
                 })
@@ -327,12 +335,36 @@ func ReadServerResponse(arvLogger *logger.Logger,
  
         response.Address = keepServer
         response.Contents.BlockDigestToInfo =
-               make(map[blockdigest.BlockDigest]BlockInfo)
-       scanner := bufio.NewScanner(resp.Body)
+               make(map[blockdigest.DigestWithSize]BlockInfo)
+       reader := bufio.NewReader(resp.Body)
         numLines, numDuplicates, numSizeDisagreements := 0, 0, 0
-       for scanner.Scan() {
+       for {
                 numLines++
-               blockInfo, err := parseBlockInfoFromIndexLine(scanner.Text())
+               line, err := reader.ReadString('\n')
+               if err == io.EOF {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Index from %s truncated at line %d",
+                                       keepServer.String(), numLines))
+               } else if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Error reading index response from %s at line %d: %v",
+                                       keepServer.String(), numLines, err))
+               }
+               if line == "\n" {
+                       if _, err := reader.Peek(1); err == nil {
+                               extra, _ := reader.ReadString('\n')
+                               loggerutil.FatalWithMessage(arvLogger,
+                                       fmt.Sprintf("Index from %s had trailing data at line %d after EOF marker: %s",
+                                               keepServer.String(), numLines+1, extra))
+                       } else if err != io.EOF {
+                               loggerutil.FatalWithMessage(arvLogger,
+                                       fmt.Sprintf("Index from %s had read error after EOF marker at line %d: %v",
+                                               keepServer.String(), numLines, err))
+                       }
+                       numLines--
+                       break
+               }
+               blockInfo, err := parseBlockInfoFromIndexLine(line)
                 if err != nil {
                         loggerutil.FatalWithMessage(arvLogger,
                                 fmt.Sprintf("Error parsing BlockInfo from index line "+
@@ -344,64 +376,33 @@ func ReadServerResponse(arvLogger *logger.Logger,
                 if storedBlock, ok := response.Contents.BlockDigestToInfo[blockInfo.Digest]; ok {
                         // This server returned multiple lines containing the same block digest.
                         numDuplicates += 1
-                       if storedBlock.Size != blockInfo.Size {
-                               numSizeDisagreements += 1
-                               // TODO(misha): Consider failing here.
-                               message := fmt.Sprintf("Saw different sizes for the same block "+
-                                       "on %s: %+v %+v",
-                                       keepServer.String(),
-                                       storedBlock,
-                                       blockInfo)
-                               log.Println(message)
-                               if arvLogger != nil {
-                                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                                               keepInfo := p["keep_info"].(map[string]interface{})
-                                               serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
-                                               var error_list []string
-                                               read_error_list, has_list := serverInfo["error_list"]
-                                               if has_list {
-                                                       error_list = read_error_list.([]string)
-                                               } // If we didn't have the list, error_list is already an empty list
-                                               serverInfo["error_list"] = append(error_list, message)
-                                       })
-                               }
-                       }
-                       // Keep the block that is bigger, or the block that's newer in
-                       // the case of a size tie.
-                       if storedBlock.Size < blockInfo.Size ||
-                               (storedBlock.Size == blockInfo.Size &&
-                                       storedBlock.Mtime < blockInfo.Mtime) {
+                       // Keep the block that's newer.
+                       if storedBlock.Mtime < blockInfo.Mtime {
                                 response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
                         }
                 } else {
                         response.Contents.BlockDigestToInfo[blockInfo.Digest] = blockInfo
                 }
         }
-       if err := scanner.Err(); err != nil {
-               loggerutil.FatalWithMessage(arvLogger,
-                       fmt.Sprintf("Received error scanning index response from %s: %v",
-                               keepServer.String(),
-                               err))
-       } else {
-               log.Printf("%s index contained %d lines with %d duplicates with "+
-                       "%d size disagreements",
-                       keepServer.String(),
-                       numLines,
-                       numDuplicates,
-                       numSizeDisagreements)
-
-               if arvLogger != nil {
-                       now := time.Now()
-                       arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                               keepInfo := p["keep_info"].(map[string]interface{})
-                               serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
-
-                               serverInfo["processing_finished_at"] = now
-                               serverInfo["lines_received"] = numLines
-                               serverInfo["duplicates_seen"] = numDuplicates
-                               serverInfo["size_disagreements_seen"] = numSizeDisagreements
-                       })
-               }
+
+       log.Printf("%s index contained %d lines with %d duplicates with "+
+               "%d size disagreements",
+               keepServer.String(),
+               numLines,
+               numDuplicates,
+               numSizeDisagreements)
+
+       if arvLogger != nil {
+               now := time.Now()
+               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
+                       serverInfo := keepInfo[keepServer.Uuid].(map[string]interface{})
+
+                       serverInfo["processing_finished_at"] = now
+                       serverInfo["lines_received"] = numLines
+                       serverInfo["duplicates_seen"] = numDuplicates
+                       serverInfo["size_disagreements_seen"] = numSizeDisagreements
+               })
         }
         resp.Body.Close()
         return
@@ -415,8 +416,10 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
                         tokens)
         }
  
-       var locator manifest.BlockLocator
-       if locator, err = manifest.ParseBlockLocator(tokens[0]); err != nil {
+       var locator blockdigest.BlockLocator
+       if locator, err = blockdigest.ParseBlockLocator(tokens[0]); err != nil {
+               err = fmt.Errorf("%v Received error while parsing line \"%s\"",
+                       err, indexLine)
                 return
         }
         if len(locator.Hints) > 0 {
@@ -430,15 +433,91 @@ func parseBlockInfoFromIndexLine(indexLine string) (blockInfo BlockInfo, err err
         if err != nil {
                 return
         }
-       blockInfo.Digest = locator.Digest
-       blockInfo.Size = locator.Size
+       blockInfo.Digest =
+               blockdigest.DigestWithSize{Digest: locator.Digest,
+                       Size: uint32(locator.Size)}
         return
  }
  
-func ComputeBlockReplicationCounts(readServers *ReadServers) {
+func (readServers *ReadServers) Summarize(arvLogger *logger.Logger) {
         readServers.BlockReplicationCounts = make(map[int]int)
         for _, infos := range readServers.BlockToServers {
                 replication := len(infos)
                 readServers.BlockReplicationCounts[replication] += 1
         }
+
+       if arvLogger != nil {
+               arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
+                       keepInfo := logger.GetOrCreateMap(p, "keep_info")
+                       keepInfo["distinct_blocks_stored"] = len(readServers.BlockToServers)
+               })
+       }
+
+}
+
+type TrashRequest struct {
+       Locator    string `json:"locator"`
+       BlockMtime int64  `json:"block_mtime"`
+}
+
+type TrashList []TrashRequest
+
+func SendTrashLists(dataManagerToken string, kc *keepclient.KeepClient, spl map[string]TrashList) (errs []error) {
+       count := 0
+       barrier := make(chan error)
+
+       client := kc.Client
+
+       for url, v := range spl {
+               count += 1
+               log.Printf("Sending trash list to %v", url)
+
+               go (func(url string, v TrashList) {
+                       pipeReader, pipeWriter := io.Pipe()
+                       go (func() {
+                               enc := json.NewEncoder(pipeWriter)
+                               enc.Encode(v)
+                               pipeWriter.Close()
+                       })()
+
+                       req, err := http.NewRequest("PUT", fmt.Sprintf("%s/trash", url), pipeReader)
+                       if err != nil {
+                               log.Printf("Error creating trash list request for %v error: %v", url, err.Error())
+                               barrier <- err
+                               return
+                       }
+
+                       // Add api token header
+                       req.Header.Add("Authorization", fmt.Sprintf("OAuth2 %s", dataManagerToken))
+
+                       // Make the request
+                       var resp *http.Response
+                       if resp, err = client.Do(req); err != nil {
+                               log.Printf("Error sending trash list to %v error: %v", url, err.Error())
+                               barrier <- err
+                               return
+                       }
+
+                       log.Printf("Sent trash list to %v: response was HTTP %v", url, resp.Status)
+
+                       io.Copy(ioutil.Discard, resp.Body)
+                       resp.Body.Close()
+
+                       if resp.StatusCode != 200 {
+                               barrier <- errors.New(fmt.Sprintf("Got HTTP code %v", resp.StatusCode))
+                       } else {
+                               barrier <- nil
+                       }
+               })(url, v)
+
+       }
+
+       for i := 0; i < count; i += 1 {
+               b := <-barrier
+               if b != nil {
+                       errs = append(errs, b)
+               }
+       }
+
+       return errs
  }
diff --git a/services/datamanager/keep/keep_test.go b/services/datamanager/keep/keep_test.go

new file mode 100644 (file)

index 0000000..f39463e
--- /dev/null
+++ b/services/datamanager/keep/keep_test.go
@@ -0,0 +1,81 @@
+package keep
+
+import (
+       "encoding/json"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       . "gopkg.in/check.v1"
+       "net/http"
+       "net/http/httptest"
+       "testing"
+)
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       TestingT(t)
+}
+
+type KeepSuite struct{}
+
+var _ = Suite(&KeepSuite{})
+
+type TestHandler struct {
+       request TrashList
+}
+
+func (this *TestHandler) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+       r := json.NewDecoder(req.Body)
+       r.Decode(&this.request)
+}
+
+func (s *KeepSuite) TestSendTrashLists(c *C) {
+       th := TestHandler{}
+       server := httptest.NewServer(&th)
+
+       tl := map[string]TrashList{
+               server.URL: TrashList{TrashRequest{"000000000000000000000000deadbeef", 99}}}
+
+       kc := keepclient.KeepClient{Client: &http.Client{}}
+       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
+               map[string]string{"xxxx": server.URL},
+               map[string]string{})
+
+       err := SendTrashLists("", &kc, tl)
+       server.Close()
+
+       c.Check(err, IsNil)
+
+       c.Check(th.request,
+               DeepEquals,
+               tl[server.URL])
+
+}
+
+type TestHandlerError struct {
+}
+
+func (this *TestHandlerError) ServeHTTP(writer http.ResponseWriter, req *http.Request) {
+       http.Error(writer, "I'm a teapot", 418)
+}
+
+func sendTrashListError(c *C, server *httptest.Server) {
+       tl := map[string]TrashList{
+               server.URL: TrashList{TrashRequest{"000000000000000000000000deadbeef", 99}}}
+
+       kc := keepclient.KeepClient{Client: &http.Client{}}
+       kc.SetServiceRoots(map[string]string{"xxxx": server.URL},
+               map[string]string{"xxxx": server.URL},
+               map[string]string{})
+
+       err := SendTrashLists("", &kc, tl)
+
+       c.Check(err, NotNil)
+       c.Check(err[0], NotNil)
+}
+
+func (s *KeepSuite) TestSendTrashListErrorResponse(c *C) {
+       sendTrashListError(c, httptest.NewServer(&TestHandlerError{}))
+}
+
+func (s *KeepSuite) TestSendTrashListUnreachable(c *C) {
+       sendTrashListError(c, httptest.NewUnstartedServer(&TestHandler{}))
+}
diff --git a/services/datamanager/loggerutil/loggerutil.go b/services/datamanager/loggerutil/loggerutil.go

index 58abb119ac128a7ba94920ad6e3a29101df8e8cc..8c655cd5ff68a981146493bfa21fd71693ba12c0 100644 (file)
--- a/services/datamanager/loggerutil/loggerutil.go
+++ b/services/datamanager/loggerutil/loggerutil.go
@@ -16,7 +16,7 @@ func LogRunInfo(arvLogger *logger.Logger) {
         if arvLogger != nil {
                 now := time.Now()
                 arvLogger.Update(func(p map[string]interface{}, e map[string]interface{}) {
-                       runInfo := make(map[string]interface{})
+                       runInfo := logger.GetOrCreateMap(p, "run_info")
                         runInfo["started_at"] = now
                         runInfo["args"] = os.Args
                         hostname, err := os.Hostname()
@@ -26,26 +26,25 @@ func LogRunInfo(arvLogger *logger.Logger) {
                                 runInfo["hostname"] = hostname
                         }
                         runInfo["pid"] = os.Getpid()
-                       p["run_info"] = runInfo
                 })
         }
  }
  
  // A LogMutator that records the current memory usage. This is most useful as a logger write hook.
-//
-// Assumes we already have a map named "run_info" in properties. LogRunInfo() can create such a map for you if you call it.
  func LogMemoryAlloc(p map[string]interface{}, e map[string]interface{}) {
-       runInfo := p["run_info"].(map[string]interface{})
+       runInfo := logger.GetOrCreateMap(p, "run_info")
         var memStats runtime.MemStats
         runtime.ReadMemStats(&memStats)
-       runInfo["alloc_bytes_in_use"] = memStats.Alloc
+       runInfo["memory_bytes_in_use"] = memStats.Alloc
+       runInfo["memory_bytes_reserved"] = memStats.Sys
  }
  
  func FatalWithMessage(arvLogger *logger.Logger, message string) {
         if arvLogger != nil {
                 arvLogger.FinalUpdate(func(p map[string]interface{}, e map[string]interface{}) {
                         p["FATAL"] = message
-                       p["run_info"].(map[string]interface{})["finished_at"] = time.Now()
+                       runInfo := logger.GetOrCreateMap(p, "run_info")
+                       runInfo["finished_at"] = time.Now()
                 })
         }
  
diff --git a/services/datamanager/summary/canonical_string.go b/services/datamanager/summary/canonical_string.go

new file mode 100644 (file)

index 0000000..94f0676
--- /dev/null
+++ b/services/datamanager/summary/canonical_string.go
@@ -0,0 +1,27 @@
+/* Ensures that we only have one copy of each unique string. This is
+/* not designed for concurrent access. */
+package summary
+
+// This code should probably be moved somewhere more universal.
+
+type CanonicalString struct {
+       m map[string]string
+}
+
+func (cs *CanonicalString) Get(s string) (r string) {
+       if cs.m == nil {
+               cs.m = make(map[string]string)
+       }
+       value, found := cs.m[s]
+       if found {
+               return value
+       }
+
+       // s may be a substring of a much larger string.
+       // If we store s, it will prevent that larger string from getting
+       // garbage collected.
+       // If this is something you worry about you should change this code
+       // to make an explict copy of s using a byte array.
+       cs.m[s] = s
+       return s
+}
diff --git a/services/datamanager/summary/file.go b/services/datamanager/summary/file.go

new file mode 100644 (file)

index 0000000..8c37e99
--- /dev/null
+++ b/services/datamanager/summary/file.go
@@ -0,0 +1,120 @@
+// Handles writing data to and reading data from disk to speed up development.
+
+package summary
+
+import (
+       "encoding/gob"
+       "flag"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/logger"
+       "git.curoverse.com/arvados.git/services/datamanager/collection"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "log"
+       "os"
+)
+
+// Used to locally cache data read from servers to reduce execution
+// time when developing. Not for use in production.
+type serializedData struct {
+       ReadCollections collection.ReadCollections
+       KeepServerInfo  keep.ReadServers
+}
+
+var (
+       writeDataTo  string
+       readDataFrom string
+)
+
+type DataFetcher func(arvLogger *logger.Logger,
+       readCollections *collection.ReadCollections,
+       keepServerInfo *keep.ReadServers)
+
+func init() {
+       flag.StringVar(&writeDataTo,
+               "write-data-to",
+               "",
+               "Write summary of data received to this file. Used for development only.")
+       flag.StringVar(&readDataFrom,
+               "read-data-from",
+               "",
+               "Avoid network i/o and read summary data from this file instead. Used for development only.")
+}
+
+// Writes data we've read to a file.
+//
+// This is useful for development, so that we don't need to read all
+// our data from the network every time we tweak something.
+//
+// This should not be used outside of development, since you'll be
+// working with stale data.
+func MaybeWriteData(arvLogger *logger.Logger,
+       readCollections collection.ReadCollections,
+       keepServerInfo keep.ReadServers) bool {
+       if writeDataTo == "" {
+               return false
+       } else {
+               summaryFile, err := os.Create(writeDataTo)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to open %s: %v", writeDataTo, err))
+               }
+               defer summaryFile.Close()
+
+               enc := gob.NewEncoder(summaryFile)
+               data := serializedData{
+                       ReadCollections: readCollections,
+                       KeepServerInfo:  keepServerInfo}
+               err = enc.Encode(data)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to write summary data: %v", err))
+               }
+               log.Printf("Wrote summary data to: %s", writeDataTo)
+               return true
+       }
+}
+
+func ShouldReadData() bool {
+       return readDataFrom != ""
+}
+
+// Reads data that we've written to a file.
+//
+// This is useful for development, so that we don't need to read all
+// our data from the network every time we tweak something.
+//
+// This should not be used outside of development, since you'll be
+// working with stale data.
+func ReadData(arvLogger *logger.Logger,
+       readCollections *collection.ReadCollections,
+       keepServerInfo *keep.ReadServers) {
+       if readDataFrom == "" {
+               loggerutil.FatalWithMessage(arvLogger,
+                       "ReadData() called with empty filename.")
+       } else {
+               summaryFile, err := os.Open(readDataFrom)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to open %s: %v", readDataFrom, err))
+               }
+               defer summaryFile.Close()
+
+               dec := gob.NewDecoder(summaryFile)
+               data := serializedData{}
+               err = dec.Decode(&data)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to read summary data: %v", err))
+               }
+
+               // re-summarize data, so that we can update our summarizing
+               // functions without needing to do all our network i/o
+               data.ReadCollections.Summarize(arvLogger)
+               data.KeepServerInfo.Summarize(arvLogger)
+
+               *readCollections = data.ReadCollections
+               *keepServerInfo = data.KeepServerInfo
+               log.Printf("Read summary data from: %s", readDataFrom)
+       }
+}
diff --git a/services/datamanager/summary/pull_list.go b/services/datamanager/summary/pull_list.go

new file mode 100644 (file)

index 0000000..b326c95
--- /dev/null
+++ b/services/datamanager/summary/pull_list.go
@@ -0,0 +1,194 @@
+// Code for generating pull lists as described in https://arvados.org/projects/arvados/wiki/Keep_Design_Doc#Pull-List
+package summary
+
+import (
+       "encoding/json"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.curoverse.com/arvados.git/sdk/go/logger"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "git.curoverse.com/arvados.git/services/datamanager/loggerutil"
+       "log"
+       "os"
+       "strings"
+)
+
+type Locator blockdigest.DigestWithSize
+
+func (l Locator) MarshalJSON() ([]byte, error) {
+       return []byte("\"" + blockdigest.DigestWithSize(l).String() + "\""), nil
+}
+
+// One entry in the Pull List
+type PullRequest struct {
+       Locator Locator  `json:"locator"`
+       Servers []string `json:"servers"`
+}
+
+// The Pull List for a particular server
+type PullList []PullRequest
+
+// PullListByLocator implements sort.Interface for PullList based on
+// the Digest.
+type PullListByLocator PullList
+
+func (a PullListByLocator) Len() int      { return len(a) }
+func (a PullListByLocator) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
+func (a PullListByLocator) Less(i, j int) bool {
+       di, dj := a[i].Locator.Digest, a[j].Locator.Digest
+       if di.H < dj.H {
+               return true
+       } else if di.H == dj.H {
+               if di.L < dj.L {
+                       return true
+               } else if di.L == dj.L {
+                       return a[i].Locator.Size < a[j].Locator.Size
+               }
+       }
+       return false
+}
+
+// For a given under-replicated block, this structure represents which
+// servers should pull the specified block and which servers they can
+// pull it from.
+type PullServers struct {
+       To   []string // Servers that should pull the specified block
+       From []string // Servers that already contain the specified block
+}
+
+// Creates a map from block locator to PullServers with one entry for
+// each under-replicated block.
+//
+// This method ignores zero-replica blocks since there are no servers
+// to pull them from, so callers should feel free to omit them, but
+// this function will ignore them if they are provided.
+func ComputePullServers(kc *keepclient.KeepClient,
+       keepServerInfo *keep.ReadServers,
+       blockToDesiredReplication map[blockdigest.DigestWithSize]int,
+       underReplicated BlockSet) (m map[Locator]PullServers) {
+       m = map[Locator]PullServers{}
+       // We use CanonicalString to avoid filling memory with dupicate
+       // copies of the same string.
+       var cs CanonicalString
+
+       // Servers that are writeable
+       writableServers := map[string]struct{}{}
+       for _, url := range kc.WritableLocalRoots() {
+               writableServers[cs.Get(url)] = struct{}{}
+       }
+
+       for block, _ := range underReplicated {
+               serversStoringBlock := keepServerInfo.BlockToServers[block]
+               numCopies := len(serversStoringBlock)
+               numCopiesMissing := blockToDesiredReplication[block] - numCopies
+               if numCopiesMissing > 0 {
+                       // We expect this to always be true, since the block was listed
+                       // in underReplicated.
+
+                       if numCopies > 0 {
+                               // Not much we can do with blocks with no copies.
+
+                               // A server's host-port string appears as a key in this map
+                               // iff it contains the block.
+                               serverHasBlock := map[string]struct{}{}
+                               for _, info := range serversStoringBlock {
+                                       sa := keepServerInfo.KeepServerIndexToAddress[info.ServerIndex]
+                                       serverHasBlock[cs.Get(sa.URL())] = struct{}{}
+                               }
+
+                               roots := keepclient.NewRootSorter(kc.LocalRoots(),
+                                       block.String()).GetSortedRoots()
+
+                               l := Locator(block)
+                               m[l] = CreatePullServers(cs, serverHasBlock, writableServers,
+                                       roots, numCopiesMissing)
+                       }
+               }
+       }
+       return m
+}
+
+// Creates a pull list in which the To and From fields preserve the
+// ordering of sorted servers and the contents are all canonical
+// strings.
+func CreatePullServers(cs CanonicalString,
+       serverHasBlock map[string]struct{},
+       writableServers map[string]struct{},
+       sortedServers []string,
+       maxToFields int) (ps PullServers) {
+
+       ps = PullServers{
+               To:   make([]string, 0, maxToFields),
+               From: make([]string, 0, len(serverHasBlock)),
+       }
+
+       for _, host := range sortedServers {
+               // Strip the protocol portion of the url.
+               // Use the canonical copy of the string to avoid memory waste.
+               server := cs.Get(host)
+               _, hasBlock := serverHasBlock[server]
+               if hasBlock {
+                       // The from field should include the protocol.
+                       ps.From = append(ps.From, cs.Get(host))
+               } else if len(ps.To) < maxToFields {
+                       _, writable := writableServers[host]
+                       if writable {
+                               ps.To = append(ps.To, server)
+                       }
+               }
+       }
+
+       return
+}
+
+// Strips the protocol prefix from a url.
+func RemoveProtocolPrefix(url string) string {
+       return url[(strings.LastIndex(url, "/") + 1):]
+}
+
+// Produces a PullList for each keep server.
+func BuildPullLists(lps map[Locator]PullServers) (spl map[string]PullList) {
+       spl = map[string]PullList{}
+       // We don't worry about canonicalizing our strings here, because we
+       // assume lps was created by ComputePullServers() which already
+       // canonicalized the strings for us.
+       for locator, pullServers := range lps {
+               for _, destination := range pullServers.To {
+                       pullList, pullListExists := spl[destination]
+                       if !pullListExists {
+                               pullList = PullList{}
+                       }
+                       spl[destination] = append(pullList,
+                               PullRequest{Locator: locator, Servers: pullServers.From})
+               }
+       }
+       return
+}
+
+// Writes each pull list to a file.
+// The filename is based on the hostname.
+//
+// This is just a hack for prototyping, it is not expected to be used
+// in production.
+func WritePullLists(arvLogger *logger.Logger,
+       pullLists map[string]PullList) {
+       r := strings.NewReplacer(":", ".")
+       for host, list := range pullLists {
+               filename := fmt.Sprintf("pull_list.%s", r.Replace(RemoveProtocolPrefix(host)))
+               pullListFile, err := os.Create(filename)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to open %s: %v", filename, err))
+               }
+               defer pullListFile.Close()
+
+               enc := json.NewEncoder(pullListFile)
+               err = enc.Encode(list)
+               if err != nil {
+                       loggerutil.FatalWithMessage(arvLogger,
+                               fmt.Sprintf("Failed to write pull list to %s: %v", filename, err))
+               }
+               log.Printf("Wrote pull list to %s.", filename)
+       }
+}
diff --git a/services/datamanager/summary/pull_list_test.go b/services/datamanager/summary/pull_list_test.go

new file mode 100644 (file)

index 0000000..e2050c2
--- /dev/null
+++ b/services/datamanager/summary/pull_list_test.go
@@ -0,0 +1,272 @@
+package summary
+
+import (
+       "encoding/json"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       . "gopkg.in/check.v1"
+       "sort"
+       "testing"
+)
+
+// Gocheck boilerplate
+func TestPullLists(t *testing.T) {
+       TestingT(t)
+}
+
+type PullSuite struct{}
+
+var _ = Suite(&PullSuite{})
+
+// Helper method to declare string sets more succinctly
+// Could be placed somewhere more general.
+func stringSet(slice ...string) (m map[string]struct{}) {
+       m = map[string]struct{}{}
+       for _, s := range slice {
+               m[s] = struct{}{}
+       }
+       return
+}
+
+func (s *PullSuite) TestPullListPrintsJSONCorrectly(c *C) {
+       pl := PullList{PullRequest{
+               Locator: Locator(blockdigest.MakeTestDigestSpecifySize(0xBadBeef, 56789)),
+               Servers: []string{"keep0.qr1hi.arvadosapi.com:25107",
+                       "keep1.qr1hi.arvadosapi.com:25108"}}}
+
+       b, err := json.Marshal(pl)
+       c.Assert(err, IsNil)
+       expectedOutput := `[{"locator":"0000000000000000000000000badbeef+56789",` +
+               `"servers":["keep0.qr1hi.arvadosapi.com:25107",` +
+               `"keep1.qr1hi.arvadosapi.com:25108"]}]`
+       c.Check(string(b), Equals, expectedOutput)
+}
+
+func (s *PullSuite) TestCreatePullServers(c *C) {
+       var cs CanonicalString
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet(),
+                       stringSet(),
+                       []string{},
+                       5),
+               DeepEquals,
+               PullServers{To: []string{}, From: []string{}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet(),
+                       []string{},
+                       5),
+               DeepEquals,
+               PullServers{To: []string{}, From: []string{}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep0:25107"),
+                       []string{"https://keep0:25107"},
+                       5),
+               DeepEquals,
+               PullServers{To: []string{}, From: []string{"https://keep0:25107"}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
+                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
+                       5),
+               DeepEquals,
+               PullServers{To: []string{"https://keep3:25110", "https://keep2:25109"},
+                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep3:25110", "https://keep1:25108", "https://keep0:25107"),
+                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
+                       5),
+               DeepEquals,
+               PullServers{To: []string{"https://keep3:25110"},
+                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
+                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
+                       1),
+               DeepEquals,
+               PullServers{To: []string{"https://keep3:25110"},
+                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep3:25110", "https://keep2:25109",
+                               "https://keep1:25108", "https://keep0:25107"),
+                       []string{"https://keep3:25110", "https://keep2:25109",
+                               "https://keep1:25108", "https://keep0:25107"},
+                       1),
+               DeepEquals,
+               PullServers{To: []string{"https://keep3:25110"},
+                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
+
+       c.Check(
+               CreatePullServers(cs,
+                       stringSet("https://keep0:25107", "https://keep1:25108"),
+                       stringSet("https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"),
+                       []string{"https://keep3:25110", "https://keep2:25109", "https://keep1:25108", "https://keep0:25107"},
+                       0),
+               DeepEquals,
+               PullServers{To: []string{},
+                       From: []string{"https://keep1:25108", "https://keep0:25107"}})
+}
+
+// Checks whether two pull list maps are equal. Since pull lists are
+// ordered arbitrarily, we need to sort them by digest before
+// comparing them for deep equality.
+type pullListMapEqualsChecker struct {
+       *CheckerInfo
+}
+
+func (c *pullListMapEqualsChecker) Check(params []interface{}, names []string) (result bool, error string) {
+       obtained, ok := params[0].(map[string]PullList)
+       if !ok {
+               return false, "First parameter is not a PullList map"
+       }
+       expected, ok := params[1].(map[string]PullList)
+       if !ok {
+               return false, "Second parameter is not a PullList map"
+       }
+
+       for _, v := range obtained {
+               sort.Sort(PullListByLocator(v))
+       }
+       for _, v := range expected {
+               sort.Sort(PullListByLocator(v))
+       }
+
+       return DeepEquals.Check(params, names)
+}
+
+var PullListMapEquals Checker = &pullListMapEqualsChecker{&CheckerInfo{
+       Name:   "PullListMapEquals",
+       Params: []string{"obtained", "expected"},
+}}
+
+func (s *PullSuite) TestBuildPullLists(c *C) {
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{}),
+               PullListMapEquals,
+               map[string]PullList{})
+
+       locator1 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xBadBeef)}
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{To: []string{}, From: []string{}}}),
+               PullListMapEquals,
+               map[string]PullList{})
+
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{To: []string{}, From: []string{"f1", "f2"}}}),
+               PullListMapEquals,
+               map[string]PullList{})
+
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{To: []string{"t1"}, From: []string{"f1", "f2"}}}),
+               PullListMapEquals,
+               map[string]PullList{
+                       "t1": PullList{PullRequest{locator1, []string{"f1", "f2"}}}})
+
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{To: []string{"t1"}, From: []string{}}}),
+               PullListMapEquals,
+               map[string]PullList{"t1": PullList{
+                       PullRequest{locator1, []string{}}}})
+
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{
+                               To:   []string{"t1", "t2"},
+                               From: []string{"f1", "f2"},
+                       }}),
+               PullListMapEquals,
+               map[string]PullList{
+                       "t1": PullList{PullRequest{locator1, []string{"f1", "f2"}}},
+                       "t2": PullList{PullRequest{locator1, []string{"f1", "f2"}}},
+               })
+
+       locator2 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xCabbed)}
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{To: []string{"t1"}, From: []string{"f1", "f2"}},
+                       locator2: PullServers{To: []string{"t2"}, From: []string{"f3", "f4"}}}),
+               PullListMapEquals,
+               map[string]PullList{
+                       "t1": PullList{PullRequest{locator1, []string{"f1", "f2"}}},
+                       "t2": PullList{PullRequest{locator2, []string{"f3", "f4"}}},
+               })
+
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{
+                               To:   []string{"t1"},
+                               From: []string{"f1", "f2"}},
+                       locator2: PullServers{
+                               To:   []string{"t2", "t1"},
+                               From: []string{"f3", "f4"}},
+               }),
+               PullListMapEquals,
+               map[string]PullList{
+                       "t1": PullList{
+                               PullRequest{locator1, []string{"f1", "f2"}},
+                               PullRequest{locator2, []string{"f3", "f4"}},
+                       },
+                       "t2": PullList{
+                               PullRequest{locator2, []string{"f3", "f4"}},
+                       },
+               })
+
+       locator3 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xDeadBeef)}
+       locator4 := Locator{Digest: blockdigest.MakeTestBlockDigest(0xFedBeef)}
+       c.Check(
+               BuildPullLists(map[Locator]PullServers{
+                       locator1: PullServers{
+                               To:   []string{"t1"},
+                               From: []string{"f1", "f2"}},
+                       locator2: PullServers{
+                               To:   []string{"t2", "t1"},
+                               From: []string{"f3", "f4"}},
+                       locator3: PullServers{
+                               To:   []string{"t3", "t2", "t1"},
+                               From: []string{"f4", "f5"}},
+                       locator4: PullServers{
+                               To:   []string{"t4", "t3", "t2", "t1"},
+                               From: []string{"f1", "f5"}},
+               }),
+               PullListMapEquals,
+               map[string]PullList{
+                       "t1": PullList{
+                               PullRequest{locator1, []string{"f1", "f2"}},
+                               PullRequest{locator2, []string{"f3", "f4"}},
+                               PullRequest{locator3, []string{"f4", "f5"}},
+                               PullRequest{locator4, []string{"f1", "f5"}},
+                       },
+                       "t2": PullList{
+                               PullRequest{locator2, []string{"f3", "f4"}},
+                               PullRequest{locator3, []string{"f4", "f5"}},
+                               PullRequest{locator4, []string{"f1", "f5"}},
+                       },
+                       "t3": PullList{
+                               PullRequest{locator3, []string{"f4", "f5"}},
+                               PullRequest{locator4, []string{"f1", "f5"}},
+                       },
+                       "t4": PullList{
+                               PullRequest{locator4, []string{"f1", "f5"}},
+                       },
+               })
+}
diff --git a/services/datamanager/summary/summary.go b/services/datamanager/summary/summary.go

new file mode 100644 (file)

index 0000000..edd760b
--- /dev/null
+++ b/services/datamanager/summary/summary.go
@@ -0,0 +1,267 @@
+// Summarizes Collection Data and Keep Server Contents.
+package summary
+
+// TODO(misha): Check size of blocks as well as their digest.
+
+import (
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/services/datamanager/collection"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "sort"
+)
+
+type BlockSet map[blockdigest.DigestWithSize]struct{}
+
+// Adds a single block to the set.
+func (bs BlockSet) Insert(digest blockdigest.DigestWithSize) {
+       bs[digest] = struct{}{}
+}
+
+// Adds a set of blocks to the set.
+func (bs BlockSet) Union(obs BlockSet) {
+       for k, v := range obs {
+               bs[k] = v
+       }
+}
+
+// We use the collection index to save space. To convert to and from
+// the uuid, use collection.ReadCollections' fields
+// CollectionIndexToUuid and CollectionUuidToIndex.
+type CollectionIndexSet map[int]struct{}
+
+// Adds a single collection to the set. The collection is specified by
+// its index.
+func (cis CollectionIndexSet) Insert(collectionIndex int) {
+       cis[collectionIndex] = struct{}{}
+}
+
+func (bs BlockSet) ToCollectionIndexSet(
+       readCollections collection.ReadCollections,
+       collectionIndexSet *CollectionIndexSet) {
+       for block := range bs {
+               for _, collectionIndex := range readCollections.BlockToCollectionIndices[block] {
+                       collectionIndexSet.Insert(collectionIndex)
+               }
+       }
+}
+
+// Keeps track of the requested and actual replication levels.
+// Currently this is only used for blocks but could easily be used for
+// collections as well.
+type ReplicationLevels struct {
+       // The requested replication level.
+       // For Blocks this is the maximum replication level among all the
+       // collections this block belongs to.
+       Requested int
+
+       // The actual number of keep servers this is on.
+       Actual int
+}
+
+// Maps from replication levels to their blocks.
+type ReplicationLevelBlockSetMap map[ReplicationLevels]BlockSet
+
+// An individual entry from ReplicationLevelBlockSetMap which only reports the number of blocks, not which blocks.
+type ReplicationLevelBlockCount struct {
+       Levels ReplicationLevels
+       Count  int
+}
+
+// An ordered list of ReplicationLevelBlockCount useful for reporting.
+type ReplicationLevelBlockSetSlice []ReplicationLevelBlockCount
+
+type ReplicationSummary struct {
+       CollectionBlocksNotInKeep  BlockSet
+       UnderReplicatedBlocks      BlockSet
+       OverReplicatedBlocks       BlockSet
+       CorrectlyReplicatedBlocks  BlockSet
+       KeepBlocksNotInCollections BlockSet
+
+       CollectionsNotFullyInKeep      CollectionIndexSet
+       UnderReplicatedCollections     CollectionIndexSet
+       OverReplicatedCollections      CollectionIndexSet
+       CorrectlyReplicatedCollections CollectionIndexSet
+}
+
+// This struct counts the elements in each set in ReplicationSummary.
+type ReplicationSummaryCounts struct {
+       CollectionBlocksNotInKeep      int
+       UnderReplicatedBlocks          int
+       OverReplicatedBlocks           int
+       CorrectlyReplicatedBlocks      int
+       KeepBlocksNotInCollections     int
+       CollectionsNotFullyInKeep      int
+       UnderReplicatedCollections     int
+       OverReplicatedCollections      int
+       CorrectlyReplicatedCollections int
+}
+
+// Gets the BlockSet for a given set of ReplicationLevels, creating it
+// if it doesn't already exist.
+func (rlbs ReplicationLevelBlockSetMap) GetOrCreate(
+       repLevels ReplicationLevels) (bs BlockSet) {
+       bs, exists := rlbs[repLevels]
+       if !exists {
+               bs = make(BlockSet)
+               rlbs[repLevels] = bs
+       }
+       return
+}
+
+// Adds a block to the set for a given replication level.
+func (rlbs ReplicationLevelBlockSetMap) Insert(
+       repLevels ReplicationLevels,
+       block blockdigest.DigestWithSize) {
+       rlbs.GetOrCreate(repLevels).Insert(block)
+}
+
+// Adds a set of blocks to the set for a given replication level.
+func (rlbs ReplicationLevelBlockSetMap) Union(
+       repLevels ReplicationLevels,
+       bs BlockSet) {
+       rlbs.GetOrCreate(repLevels).Union(bs)
+}
+
+// Outputs a sorted list of ReplicationLevelBlockCounts.
+func (rlbs ReplicationLevelBlockSetMap) Counts() (
+       sorted ReplicationLevelBlockSetSlice) {
+       sorted = make(ReplicationLevelBlockSetSlice, len(rlbs))
+       i := 0
+       for levels, set := range rlbs {
+               sorted[i] = ReplicationLevelBlockCount{Levels: levels, Count: len(set)}
+               i++
+       }
+       sort.Sort(sorted)
+       return
+}
+
+// Implemented to meet sort.Interface
+func (rlbss ReplicationLevelBlockSetSlice) Len() int {
+       return len(rlbss)
+}
+
+// Implemented to meet sort.Interface
+func (rlbss ReplicationLevelBlockSetSlice) Less(i, j int) bool {
+       return rlbss[i].Levels.Requested < rlbss[j].Levels.Requested ||
+               (rlbss[i].Levels.Requested == rlbss[j].Levels.Requested &&
+                       rlbss[i].Levels.Actual < rlbss[j].Levels.Actual)
+}
+
+// Implemented to meet sort.Interface
+func (rlbss ReplicationLevelBlockSetSlice) Swap(i, j int) {
+       rlbss[i], rlbss[j] = rlbss[j], rlbss[i]
+}
+
+func (rs ReplicationSummary) ComputeCounts() (rsc ReplicationSummaryCounts) {
+       // TODO(misha): Consider rewriting this method to iterate through
+       // the fields using reflection, instead of explictily listing the
+       // fields as we do now.
+       rsc.CollectionBlocksNotInKeep = len(rs.CollectionBlocksNotInKeep)
+       rsc.UnderReplicatedBlocks = len(rs.UnderReplicatedBlocks)
+       rsc.OverReplicatedBlocks = len(rs.OverReplicatedBlocks)
+       rsc.CorrectlyReplicatedBlocks = len(rs.CorrectlyReplicatedBlocks)
+       rsc.KeepBlocksNotInCollections = len(rs.KeepBlocksNotInCollections)
+       rsc.CollectionsNotFullyInKeep = len(rs.CollectionsNotFullyInKeep)
+       rsc.UnderReplicatedCollections = len(rs.UnderReplicatedCollections)
+       rsc.OverReplicatedCollections = len(rs.OverReplicatedCollections)
+       rsc.CorrectlyReplicatedCollections = len(rs.CorrectlyReplicatedCollections)
+       return rsc
+}
+
+func (rsc ReplicationSummaryCounts) PrettyPrint() string {
+       return fmt.Sprintf("Replication Block Counts:"+
+               "\n Missing From Keep: %d, "+
+               "\n Under Replicated: %d, "+
+               "\n Over Replicated: %d, "+
+               "\n Replicated Just Right: %d, "+
+               "\n Not In Any Collection: %d. "+
+               "\nReplication Collection Counts:"+
+               "\n Missing From Keep: %d, "+
+               "\n Under Replicated: %d, "+
+               "\n Over Replicated: %d, "+
+               "\n Replicated Just Right: %d.",
+               rsc.CollectionBlocksNotInKeep,
+               rsc.UnderReplicatedBlocks,
+               rsc.OverReplicatedBlocks,
+               rsc.CorrectlyReplicatedBlocks,
+               rsc.KeepBlocksNotInCollections,
+               rsc.CollectionsNotFullyInKeep,
+               rsc.UnderReplicatedCollections,
+               rsc.OverReplicatedCollections,
+               rsc.CorrectlyReplicatedCollections)
+}
+
+func BucketReplication(readCollections collection.ReadCollections,
+       keepServerInfo keep.ReadServers) (rlbsm ReplicationLevelBlockSetMap) {
+       rlbsm = make(ReplicationLevelBlockSetMap)
+
+       for block, requestedReplication := range readCollections.BlockToDesiredReplication {
+               rlbsm.Insert(
+                       ReplicationLevels{
+                               Requested: requestedReplication,
+                               Actual:    len(keepServerInfo.BlockToServers[block])},
+                       block)
+       }
+
+       for block, servers := range keepServerInfo.BlockToServers {
+               if 0 == readCollections.BlockToDesiredReplication[block] {
+                       rlbsm.Insert(
+                               ReplicationLevels{Requested: 0, Actual: len(servers)},
+                               block)
+               }
+       }
+       return
+}
+
+func (rlbsm ReplicationLevelBlockSetMap) SummarizeBuckets(
+       readCollections collection.ReadCollections) (
+       rs ReplicationSummary) {
+       rs.CollectionBlocksNotInKeep = make(BlockSet)
+       rs.UnderReplicatedBlocks = make(BlockSet)
+       rs.OverReplicatedBlocks = make(BlockSet)
+       rs.CorrectlyReplicatedBlocks = make(BlockSet)
+       rs.KeepBlocksNotInCollections = make(BlockSet)
+
+       rs.CollectionsNotFullyInKeep = make(CollectionIndexSet)
+       rs.UnderReplicatedCollections = make(CollectionIndexSet)
+       rs.OverReplicatedCollections = make(CollectionIndexSet)
+       rs.CorrectlyReplicatedCollections = make(CollectionIndexSet)
+
+       for levels, bs := range rlbsm {
+               if levels.Actual == 0 {
+                       rs.CollectionBlocksNotInKeep.Union(bs)
+               } else if levels.Requested == 0 {
+                       rs.KeepBlocksNotInCollections.Union(bs)
+               } else if levels.Actual < levels.Requested {
+                       rs.UnderReplicatedBlocks.Union(bs)
+               } else if levels.Actual > levels.Requested {
+                       rs.OverReplicatedBlocks.Union(bs)
+               } else {
+                       rs.CorrectlyReplicatedBlocks.Union(bs)
+               }
+       }
+
+       rs.CollectionBlocksNotInKeep.ToCollectionIndexSet(readCollections,
+               &rs.CollectionsNotFullyInKeep)
+       // Since different collections can specify different replication
+       // levels, the fact that a block is under-replicated does not imply
+       // that all collections that it belongs to are under-replicated, but
+       // we'll ignore that for now.
+       // TODO(misha): Fix this and report the correct set of collections.
+       rs.UnderReplicatedBlocks.ToCollectionIndexSet(readCollections,
+               &rs.UnderReplicatedCollections)
+       rs.OverReplicatedBlocks.ToCollectionIndexSet(readCollections,
+               &rs.OverReplicatedCollections)
+
+       for i := range readCollections.CollectionIndexToUuid {
+               if _, notInKeep := rs.CollectionsNotFullyInKeep[i]; notInKeep {
+               } else if _, underReplicated := rs.UnderReplicatedCollections[i]; underReplicated {
+               } else if _, overReplicated := rs.OverReplicatedCollections[i]; overReplicated {
+               } else {
+                       rs.CorrectlyReplicatedCollections.Insert(i)
+               }
+       }
+
+       return
+}
diff --git a/services/datamanager/summary/summary_test.go b/services/datamanager/summary/summary_test.go

new file mode 100644 (file)

index 0000000..ea76df4
--- /dev/null
+++ b/services/datamanager/summary/summary_test.go
@@ -0,0 +1,220 @@
+package summary
+
+import (
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/services/datamanager/collection"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "reflect"
+       "sort"
+       "testing"
+)
+
+func BlockSetFromSlice(digests []int) (bs BlockSet) {
+       bs = make(BlockSet)
+       for _, digest := range digests {
+               bs.Insert(blockdigest.MakeTestDigestWithSize(digest))
+       }
+       return
+}
+
+func CollectionIndexSetFromSlice(indices []int) (cis CollectionIndexSet) {
+       cis = make(CollectionIndexSet)
+       for _, index := range indices {
+               cis.Insert(index)
+       }
+       return
+}
+
+func (cis CollectionIndexSet) ToSlice() (ints []int) {
+       ints = make([]int, len(cis))
+       i := 0
+       for collectionIndex := range cis {
+               ints[i] = collectionIndex
+               i++
+       }
+       sort.Ints(ints)
+       return
+}
+
+// Helper method to meet interface expected by older tests.
+func SummarizeReplication(readCollections collection.ReadCollections,
+       keepServerInfo keep.ReadServers) (rs ReplicationSummary) {
+       return BucketReplication(readCollections, keepServerInfo).
+               SummarizeBuckets(readCollections)
+}
+
+// Takes a map from block digest to replication level and represents
+// it in a keep.ReadServers structure.
+func SpecifyReplication(digestToReplication map[int]int) (rs keep.ReadServers) {
+       rs.BlockToServers = make(map[blockdigest.DigestWithSize][]keep.BlockServerInfo)
+       for digest, replication := range digestToReplication {
+               rs.BlockToServers[blockdigest.MakeTestDigestWithSize(digest)] =
+                       make([]keep.BlockServerInfo, replication)
+       }
+       return
+}
+
+// Verifies that
+// blocks.ToCollectionIndexSet(rc.BlockToCollectionIndices) returns
+// expectedCollections.
+func VerifyToCollectionIndexSet(
+       t *testing.T,
+       blocks []int,
+       blockToCollectionIndices map[int][]int,
+       expectedCollections []int) {
+
+       expected := CollectionIndexSetFromSlice(expectedCollections)
+
+       rc := collection.ReadCollections{
+               BlockToCollectionIndices: map[blockdigest.DigestWithSize][]int{},
+       }
+       for digest, indices := range blockToCollectionIndices {
+               rc.BlockToCollectionIndices[blockdigest.MakeTestDigestWithSize(digest)] = indices
+       }
+
+       returned := make(CollectionIndexSet)
+       BlockSetFromSlice(blocks).ToCollectionIndexSet(rc, &returned)
+
+       if !reflect.DeepEqual(returned, expected) {
+               t.Errorf("Expected %v.ToCollectionIndexSet(%v) to return \n %v \n but instead received \n %v",
+                       blocks,
+                       blockToCollectionIndices,
+                       expectedCollections,
+                       returned.ToSlice())
+       }
+}
+
+func TestToCollectionIndexSet(t *testing.T) {
+       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{6: []int{0}}, []int{0})
+       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1}}, []int{1})
+       VerifyToCollectionIndexSet(t, []int{4}, map[int][]int{4: []int{1, 9}}, []int{1, 9})
+       VerifyToCollectionIndexSet(t, []int{5, 6},
+               map[int][]int{5: []int{2, 3}, 6: []int{3, 4}},
+               []int{2, 3, 4})
+       VerifyToCollectionIndexSet(t, []int{5, 6},
+               map[int][]int{5: []int{8}, 6: []int{4}},
+               []int{4, 8})
+       VerifyToCollectionIndexSet(t, []int{6}, map[int][]int{5: []int{0}}, []int{})
+}
+
+func TestSimpleSummary(t *testing.T) {
+       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+               collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
+       })
+       rc.Summarize(nil)
+       cIndex := rc.CollectionIndicesForTesting()
+
+       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1})
+
+       expectedSummary := ReplicationSummary{
+               CollectionBlocksNotInKeep:  BlockSet{},
+               UnderReplicatedBlocks:      BlockSet{},
+               OverReplicatedBlocks:       BlockSet{},
+               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2}),
+               KeepBlocksNotInCollections: BlockSet{},
+
+               CollectionsNotFullyInKeep:      CollectionIndexSet{},
+               UnderReplicatedCollections:     CollectionIndexSet{},
+               OverReplicatedCollections:      CollectionIndexSet{},
+               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
+       }
+
+       returnedSummary := SummarizeReplication(rc, keepInfo)
+
+       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v", expectedSummary, returnedSummary)
+       }
+}
+
+func TestMissingBlock(t *testing.T) {
+       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+               collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
+       })
+       rc.Summarize(nil)
+       cIndex := rc.CollectionIndicesForTesting()
+
+       keepInfo := SpecifyReplication(map[int]int{1: 1})
+
+       expectedSummary := ReplicationSummary{
+               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{2}),
+               UnderReplicatedBlocks:      BlockSet{},
+               OverReplicatedBlocks:       BlockSet{},
+               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1}),
+               KeepBlocksNotInCollections: BlockSet{},
+
+               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
+               UnderReplicatedCollections:     CollectionIndexSet{},
+               OverReplicatedCollections:      CollectionIndexSet{},
+               CorrectlyReplicatedCollections: CollectionIndexSet{},
+       }
+
+       returnedSummary := SummarizeReplication(rc, keepInfo)
+
+       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
+                       expectedSummary,
+                       returnedSummary)
+       }
+}
+
+func TestUnderAndOverReplicatedBlocks(t *testing.T) {
+       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+               collection.TestCollectionSpec{ReplicationLevel: 2, Blocks: []int{1, 2}},
+       })
+       rc.Summarize(nil)
+       cIndex := rc.CollectionIndicesForTesting()
+
+       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 3})
+
+       expectedSummary := ReplicationSummary{
+               CollectionBlocksNotInKeep:  BlockSet{},
+               UnderReplicatedBlocks:      BlockSetFromSlice([]int{1}),
+               OverReplicatedBlocks:       BlockSetFromSlice([]int{2}),
+               CorrectlyReplicatedBlocks:  BlockSet{},
+               KeepBlocksNotInCollections: BlockSet{},
+
+               CollectionsNotFullyInKeep:      CollectionIndexSet{},
+               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[0]}),
+               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[0]}),
+               CorrectlyReplicatedCollections: CollectionIndexSet{},
+       }
+
+       returnedSummary := SummarizeReplication(rc, keepInfo)
+
+       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+               t.Fatalf("Expected returnedSummary to look like %+v but instead it is %+v",
+                       expectedSummary,
+                       returnedSummary)
+       }
+}
+
+func TestMixedReplication(t *testing.T) {
+       rc := collection.MakeTestReadCollections([]collection.TestCollectionSpec{
+               collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{1, 2}},
+               collection.TestCollectionSpec{ReplicationLevel: 1, Blocks: []int{3, 4}},
+               collection.TestCollectionSpec{ReplicationLevel: 2, Blocks: []int{5, 6}},
+       })
+       rc.Summarize(nil)
+       cIndex := rc.CollectionIndicesForTesting()
+
+       keepInfo := SpecifyReplication(map[int]int{1: 1, 2: 1, 3: 1, 5: 1, 6: 3, 7: 2})
+
+       expectedSummary := ReplicationSummary{
+               CollectionBlocksNotInKeep:  BlockSetFromSlice([]int{4}),
+               UnderReplicatedBlocks:      BlockSetFromSlice([]int{5}),
+               OverReplicatedBlocks:       BlockSetFromSlice([]int{6}),
+               CorrectlyReplicatedBlocks:  BlockSetFromSlice([]int{1, 2, 3}),
+               KeepBlocksNotInCollections: BlockSetFromSlice([]int{7}),
+
+               CollectionsNotFullyInKeep:      CollectionIndexSetFromSlice([]int{cIndex[1]}),
+               UnderReplicatedCollections:     CollectionIndexSetFromSlice([]int{cIndex[2]}),
+               OverReplicatedCollections:      CollectionIndexSetFromSlice([]int{cIndex[2]}),
+               CorrectlyReplicatedCollections: CollectionIndexSetFromSlice([]int{cIndex[0]}),
+       }
+
+       returnedSummary := SummarizeReplication(rc, keepInfo)
+
+       if !reflect.DeepEqual(returnedSummary, expectedSummary) {
+               t.Fatalf("Expected returnedSummary to look like: \n%+v but instead it is: \n%+v. Index to UUID is %v. BlockToCollectionIndices is %v.", expectedSummary, returnedSummary, rc.CollectionIndexToUuid, rc.BlockToCollectionIndices)
+       }
+}
diff --git a/services/datamanager/summary/trash_list.go b/services/datamanager/summary/trash_list.go

new file mode 100644 (file)

index 0000000..0bedc9c
--- /dev/null
+++ b/services/datamanager/summary/trash_list.go
@@ -0,0 +1,60 @@
+// Code for generating trash lists
+package summary
+
+import (
+       "errors"
+       "fmt"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       "time"
+)
+
+func BuildTrashLists(kc *keepclient.KeepClient,
+       keepServerInfo *keep.ReadServers,
+       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList, err error) {
+
+       // Servers that are writeable
+       writableServers := map[string]struct{}{}
+       for _, url := range kc.WritableLocalRoots() {
+               writableServers[url] = struct{}{}
+       }
+
+       _ttl, err := kc.Arvados.Discovery("blobSignatureTtl")
+       if err != nil {
+               return nil, errors.New(fmt.Sprintf("Failed to get blobSignatureTtl, can't build trash lists: %v", err))
+       }
+
+       ttl := int64(_ttl.(float64))
+
+       // expire unreferenced blocks more than "ttl" seconds old.
+       expiry := time.Now().UTC().Unix() - ttl
+
+       return buildTrashListsInternal(writableServers, keepServerInfo, expiry, keepBlocksNotInCollections), nil
+}
+
+func buildTrashListsInternal(writableServers map[string]struct{},
+       keepServerInfo *keep.ReadServers,
+       expiry int64,
+       keepBlocksNotInCollections BlockSet) (m map[string]keep.TrashList) {
+
+       m = make(map[string]keep.TrashList)
+
+       for block := range keepBlocksNotInCollections {
+               for _, block_on_server := range keepServerInfo.BlockToServers[block] {
+                       if block_on_server.Mtime >= expiry {
+                               continue
+                       }
+
+                       // block is older than expire cutoff
+                       srv := keepServerInfo.KeepServerIndexToAddress[block_on_server.ServerIndex].String()
+
+                       if _, writable := writableServers[srv]; !writable {
+                               continue
+                       }
+
+                       m[srv] = append(m[srv], keep.TrashRequest{Locator: block.Digest.String(), BlockMtime: block_on_server.Mtime})
+               }
+       }
+       return
+
+}
diff --git a/services/datamanager/summary/trash_list_test.go b/services/datamanager/summary/trash_list_test.go

new file mode 100644 (file)

index 0000000..7620631
--- /dev/null
+++ b/services/datamanager/summary/trash_list_test.go
@@ -0,0 +1,76 @@
+package summary
+
+import (
+       "git.curoverse.com/arvados.git/sdk/go/blockdigest"
+       "git.curoverse.com/arvados.git/services/datamanager/keep"
+       . "gopkg.in/check.v1"
+       "testing"
+)
+
+// Gocheck boilerplate
+func TestTrash(t *testing.T) {
+       TestingT(t)
+}
+
+type TrashSuite struct{}
+
+var _ = Suite(&TrashSuite{})
+
+func (s *TrashSuite) TestBuildTrashLists(c *C) {
+       var sv0 = keep.ServerAddress{Host: "keep0.example.com", Port: 80}
+       var sv1 = keep.ServerAddress{Host: "keep1.example.com", Port: 80}
+
+       var block0 = blockdigest.MakeTestDigestWithSize(0xdeadbeef)
+       var block1 = blockdigest.MakeTestDigestWithSize(0xfedbeef)
+
+       var keepServerInfo = keep.ReadServers{
+               KeepServerIndexToAddress: []keep.ServerAddress{sv0, sv1},
+               BlockToServers: map[blockdigest.DigestWithSize][]keep.BlockServerInfo{
+                       block0: []keep.BlockServerInfo{
+                               keep.BlockServerInfo{0, 99},
+                               keep.BlockServerInfo{1, 101}},
+                       block1: []keep.BlockServerInfo{
+                               keep.BlockServerInfo{0, 99},
+                               keep.BlockServerInfo{1, 101}}}}
+
+       // only block0 is in delete set
+       var bs BlockSet = make(BlockSet)
+       bs[block0] = struct{}{}
+
+       // Test trash list where only sv0 is on writable list.
+       c.Check(buildTrashListsInternal(
+               map[string]struct{}{
+                       sv0.URL(): struct{}{}},
+               &keepServerInfo,
+               110,
+               bs),
+               DeepEquals,
+               map[string]keep.TrashList{
+                       "http://keep0.example.com:80": keep.TrashList{keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
+
+       // Test trash list where both sv0 and sv1 are on writable list.
+       c.Check(buildTrashListsInternal(
+               map[string]struct{}{
+                       sv0.URL(): struct{}{},
+                       sv1.URL(): struct{}{}},
+               &keepServerInfo,
+               110,
+               bs),
+               DeepEquals,
+               map[string]keep.TrashList{
+                       "http://keep0.example.com:80": keep.TrashList{keep.TrashRequest{"000000000000000000000000deadbeef", 99}},
+                       "http://keep1.example.com:80": keep.TrashList{keep.TrashRequest{"000000000000000000000000deadbeef", 101}}})
+
+       // Test trash list where only block on sv0 is expired
+       c.Check(buildTrashListsInternal(
+               map[string]struct{}{
+                       sv0.URL(): struct{}{},
+                       sv1.URL(): struct{}{}},
+               &keepServerInfo,
+               100,
+               bs),
+               DeepEquals,
+               map[string]keep.TrashList{
+                       "http://keep0.example.com:80": keep.TrashList{keep.TrashRequest{"000000000000000000000000deadbeef", 99}}})
+
+}
diff --git a/services/dockercleaner/.gitignore b/services/dockercleaner/.gitignore

new file mode 120000 (symlink)

index 0000000..ed3b362
--- /dev/null
+++ b/services/dockercleaner/.gitignore
@@ -0,0 +1 @@
+../../sdk/python/.gitignore
+\ No newline at end of file
diff --git a/services/dockercleaner/arvados_docker/__init__.py b/services/dockercleaner/arvados_docker/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/services/dockercleaner/arvados_docker/cleaner.py b/services/dockercleaner/arvados_docker/cleaner.py

new file mode 100755 (executable)

index 0000000..191cb55
--- /dev/null
+++ b/services/dockercleaner/arvados_docker/cleaner.py
@@ -0,0 +1,260 @@
+#!/usr/bin/env python3
+"""arvados_docker.cleaner - Remove unused Docker images from compute nodes
+
+Usage:
+  python3 -m arvados_docker.cleaner --quota 50G
+"""
+
+import argparse
+import collections
+import copy
+import functools
+import json
+import logging
+import sys
+import time
+
+import docker
+
+SUFFIX_SIZES = {suffix: 1024 ** exp for exp, suffix in enumerate('kmgt', 1)}
+
+logger = logging.getLogger('arvados_docker.cleaner')
+
+def return_when_docker_not_found(result=None):
+    # If the decorated function raises a 404 error from Docker, return
+    # `result` instead.
+    def docker_not_found_decorator(orig_func):
+        @functools.wraps(orig_func)
+        def docker_not_found_wrapper(*args, **kwargs):
+            try:
+                return orig_func(*args, **kwargs)
+            except docker.errors.APIError as error:
+                if error.response.status_code != 404:
+                    raise
+                return result
+        return docker_not_found_wrapper
+    return docker_not_found_decorator
+
+class DockerImage:
+    def __init__(self, image_hash):
+        self.docker_id = image_hash['Id']
+        self.size = image_hash['VirtualSize']
+        self.last_used = -1
+
+    def used_at(self, use_time):
+        self.last_used = max(self.last_used, use_time)
+
+
+class DockerImages:
+    def __init__(self, target_size):
+        self.target_size = target_size
+        self.images = {}
+        self.container_image_map = {}
+
+    @classmethod
+    def from_daemon(cls, target_size, docker_client):
+        images = cls(target_size)
+        for image in docker_client.images():
+            images.add_image(image)
+        return images
+
+    def add_image(self, image_hash):
+        image = DockerImage(image_hash)
+        self.images[image.docker_id] = image
+        logger.debug("Registered image %s", image.docker_id)
+
+    def del_image(self, image_id):
+        if image_id in self.images:
+            del self.images[image_id]
+            self.container_image_map = {
+                cid: cid_image
+                for cid, cid_image in self.container_image_map.items()
+                if cid_image != image_id}
+            logger.debug("Unregistered image %s", image_id)
+
+    def has_image(self, image_id):
+        return image_id in self.images
+
+    def add_user(self, container_hash, use_time):
+        image_id = container_hash['Image']
+        if image_id in self.images:
+            self.container_image_map[container_hash['Id']] = image_id
+            self.images[image_id].used_at(use_time)
+            logger.debug("Registered container %s using image %s",
+                         container_hash['Id'], image_id)
+
+    def end_user(self, cid):
+        self.container_image_map.pop(cid, None)
+        logger.debug("Unregistered container %s", cid)
+
+    def should_delete(self):
+        if not self.images:
+            return
+        # Build a list of images, ordered by use time.
+        lru_images = list(self.images.values())
+        lru_images.sort(key=lambda image: image.last_used)
+        # Make sure we don't delete any images in use, or if there are
+        # none, the most recently used image.
+        if self.container_image_map:
+            keep_ids = set(self.container_image_map.values())
+        else:
+            keep_ids = {lru_images[-1].docker_id}
+        space_left = (self.target_size - sum(self.images[image_id].size
+                                             for image_id in keep_ids))
+        # Go through the list most recently used first, and note which
+        # images can be saved with the space allotted.
+        for image in reversed(lru_images):
+            if (image.docker_id not in keep_ids) and (image.size <= space_left):
+                keep_ids.add(image.docker_id)
+                space_left -= image.size
+        # Yield the Docker IDs of any image we don't want to save, least
+        # recently used first.
+        for image in lru_images:
+            if image.docker_id not in keep_ids:
+                yield image.docker_id
+
+
+class DockerEventHandlers:
+    # This class maps Docker event types to the names of methods that should
+    # receive those events.
+    def __init__(self):
+        self.handler_names = collections.defaultdict(list)
+
+    def on(self, *status_names):
+        def register_handler(handler_method):
+            for status in status_names:
+                self.handler_names[status].append(handler_method.__name__)
+            return handler_method
+        return register_handler
+
+    def for_event(self, status):
+        return iter(self.handler_names[status])
+
+    def copy(self):
+        result = self.__class__()
+        result.handler_names = copy.deepcopy(self.handler_names)
+        return result
+
+
+class DockerEventListener:
+    # To use this class, define event_handlers as an instance of
+    # DockerEventHandlers.  Call run() to iterate over events and call the
+    # handler methods as they come in.
+    ENCODING = 'utf-8'
+
+    def __init__(self, events):
+        self.events = events
+
+    def run(self):
+        for event in self.events:
+            event = json.loads(event.decode(self.ENCODING))
+            for method_name in self.event_handlers.for_event(event['status']):
+                getattr(self, method_name)(event)
+
+
+class DockerImageUseRecorder(DockerEventListener):
+    event_handlers = DockerEventHandlers()
+
+    def __init__(self, images, docker_client, events):
+        self.images = images
+        self.docker_client = docker_client
+        super().__init__(events)
+
+    @event_handlers.on('create')
+    @return_when_docker_not_found()
+    def load_container(self, event):
+        container_hash = self.docker_client.inspect_container(event['id'])
+        self.new_container(event, container_hash)
+
+    def new_container(self, event, container_hash):
+        self.images.add_user(container_hash, event['time'])
+
+    @event_handlers.on('destroy')
+    def container_stopped(self, event):
+        self.images.end_user(event['id'])
+
+
+class DockerImageCleaner(DockerImageUseRecorder):
+    event_handlers = DockerImageUseRecorder.event_handlers.copy()
+
+    def __init__(self, images, docker_client, events):
+        super().__init__(images, docker_client, events)
+        self.logged_unknown = set()
+
+    def new_container(self, event, container_hash):
+        container_image_id = container_hash['Image']
+        if not self.images.has_image(container_image_id):
+            image_hash = self.docker_client.inspect_image(container_image_id)
+            self.images.add_image(image_hash)
+        return super().new_container(event, container_hash)
+
+    @event_handlers.on('destroy')
+    def clean_images(self, event=None):
+        for image_id in self.images.should_delete():
+            try:
+                self.docker_client.remove_image(image_id)
+            except docker.errors.APIError as error:
+                logger.warning("Failed to remove image %s: %s", image_id, error)
+            else:
+                logger.info("Removed image %s", image_id)
+                self.images.del_image(image_id)
+
+    @event_handlers.on('destroy')
+    def log_unknown_images(self, event):
+        unknown_ids = {image['Id'] for image in self.docker_client.images()
+                       if not self.images.has_image(image['Id'])}
+        for image_id in (unknown_ids - self.logged_unknown):
+            logger.info("Image %s is loaded but unused, so it won't be cleaned",
+                        image_id)
+        self.logged_unknown = unknown_ids
+
+
+def human_size(size_str):
+    size_str = size_str.lower().rstrip('b')
+    multiplier = SUFFIX_SIZES.get(size_str[-1])
+    if multiplier is None:
+        multiplier = 1
+    else:
+        size_str = size_str[:-1]
+    return int(size_str) * multiplier
+
+def parse_arguments(arguments):
+    parser = argparse.ArgumentParser(
+        prog="arvados_docker.cleaner",
+        description="clean old Docker images from Arvados compute nodes")
+    parser.add_argument(
+        '--quota', action='store', type=human_size, required=True,
+        help="space allowance for Docker images, suffixed with K/M/G/T")
+    parser.add_argument(
+        '--verbose', '-v', action='count', default=0,
+        help="log more information")
+    return parser.parse_args(arguments)
+
+def setup_logging(args):
+    log_handler = logging.StreamHandler()
+    log_handler.setFormatter(logging.Formatter(
+            '%(asctime)s %(name)s[%(process)d] %(levelname)s: %(message)s',
+            '%Y-%m-%d %H:%M:%S'))
+    logger.addHandler(log_handler)
+    logger.setLevel(logging.ERROR - (10 * args.verbose))
+
+def run(args, docker_client):
+    start_time = int(time.time())
+    logger.debug("Loading Docker activity through present")
+    images = DockerImages.from_daemon(args.quota, docker_client)
+    use_recorder = DockerImageUseRecorder(
+        images, docker_client, docker_client.events(since=1, until=start_time))
+    use_recorder.run()
+    cleaner = DockerImageCleaner(
+        images, docker_client, docker_client.events(since=start_time))
+    logger.info("Starting cleanup loop")
+    cleaner.clean_images()
+    cleaner.run()
+
+def main(arguments):
+    args = parse_arguments(arguments)
+    setup_logging(args)
+    run(args, docker.Client())
+
+if __name__ == '__main__':
+    main(sys.argv[1:])
diff --git a/services/dockercleaner/gittaggers.py b/services/dockercleaner/gittaggers.py

new file mode 120000 (symlink)

index 0000000..a9ad861
--- /dev/null
+++ b/services/dockercleaner/gittaggers.py
@@ -0,0 +1 @@
+../../sdk/python/gittaggers.py
+\ No newline at end of file
diff --git a/services/dockercleaner/setup.py b/services/dockercleaner/setup.py

new file mode 100644 (file)

index 0000000..a799ffe
--- /dev/null
+++ b/services/dockercleaner/setup.py
@@ -0,0 +1,33 @@
+#!/usr/bin/env python3
+
+import os
+import sys
+import setuptools.command.egg_info as egg_info_cmd
+
+from setuptools import setup, find_packages
+
+try:
+    import gittaggers
+    tagger = gittaggers.EggInfoFromGit
+except ImportError:
+    tagger = egg_info_cmd.egg_info
+
+setup(name="arvados-docker-cleaner",
+      version="0.1",
+      description="Arvados Docker cleaner",
+      author="Arvados",
+      author_email="info@arvados.org",
+      url="https://arvados.org",
+      download_url="https://github.com/curoverse/arvados.git",
+      license="GNU Affero General Public License version 3.0",
+      packages=find_packages(),
+      install_requires=[
+        'docker-py',
+        ],
+      tests_require=[
+        'mock',
+        ],
+      test_suite='tests',
+      zip_safe=False,
+      cmdclass={'egg_info': tagger},
+      )
diff --git a/services/dockercleaner/tests/__init__.py b/services/dockercleaner/tests/__init__.py

new file mode 100644 (file)

index 0000000..ab92cab
--- /dev/null
+++ b/services/dockercleaner/tests/__init__.py
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+
+import logging
+logging.getLogger('').setLevel(logging.CRITICAL)
diff --git a/services/dockercleaner/tests/test_cleaner.py b/services/dockercleaner/tests/test_cleaner.py

new file mode 100644 (file)

index 0000000..fd959de
--- /dev/null
+++ b/services/dockercleaner/tests/test_cleaner.py
@@ -0,0 +1,356 @@
+#!/usr/bin/env python3
+
+import collections
+import itertools
+import json
+import random
+import time
+import unittest
+
+import docker
+import mock
+
+from arvados_docker import cleaner
+
+MAX_DOCKER_ID = (16 ** 64) - 1
+
+def MockDockerId():
+    return '{:064x}'.format(random.randint(0, MAX_DOCKER_ID))
+
+def MockContainer(image_hash):
+    return {'Id': MockDockerId(),
+            'Image': image_hash['Id']}
+
+def MockImage(*, size=0, vsize=None, tags=[]):
+    if vsize is None:
+        vsize = random.randint(100, 2000000)
+    return {'Id': MockDockerId(),
+            'ParentId': MockDockerId(),
+            'RepoTags': list(tags),
+            'Size': size,
+            'VirtualSize': vsize}
+
+class MockEvent(dict):
+    ENCODING = 'utf-8'
+    event_seq = itertools.count(1)
+
+    def __init__(self, status, docker_id=None, **event_data):
+        if docker_id is None:
+            docker_id = MockDockerId()
+        super().__init__(self, **event_data)
+        self['status'] = status
+        self['id'] = docker_id
+        self.setdefault('time', next(self.event_seq))
+
+    def encoded(self):
+        return json.dumps(self).encode(self.ENCODING)
+
+
+class MockException(docker.errors.APIError):
+    def __init__(self, status_code):
+        response = mock.Mock(name='response')
+        response.status_code = status_code
+        super().__init__("mock exception", response)
+
+
+class DockerImageTestCase(unittest.TestCase):
+    def test_used_at_sets_last_used(self):
+        image = cleaner.DockerImage(MockImage())
+        image.used_at(5)
+        self.assertEqual(5, image.last_used)
+
+    def test_used_at_moves_forward(self):
+        image = cleaner.DockerImage(MockImage())
+        image.used_at(6)
+        image.used_at(8)
+        self.assertEqual(8, image.last_used)
+
+    def test_used_at_does_not_go_backward(self):
+        image = cleaner.DockerImage(MockImage())
+        image.used_at(4)
+        image.used_at(2)
+        self.assertEqual(4, image.last_used)
+
+
+class DockerImagesTestCase(unittest.TestCase):
+    def setUp(self):
+        self.mock_images = []
+
+    def setup_mock_images(self, *vsizes):
+        self.mock_images.extend(MockImage(vsize=vsize) for vsize in vsizes)
+
+    def setup_images(self, *vsizes, target_size=1000000):
+        self.setup_mock_images(*vsizes)
+        images = cleaner.DockerImages(target_size)
+        for image in self.mock_images:
+            images.add_image(image)
+        return images
+
+    def test_has_image(self):
+        images = self.setup_images(None)
+        self.assertTrue(images.has_image(self.mock_images[0]['Id']))
+        self.assertFalse(images.has_image(MockDockerId()))
+
+    def test_del_image(self):
+        images = self.setup_images(None)
+        images.del_image(self.mock_images[0]['Id'])
+        self.assertFalse(images.has_image(self.mock_images[0]['Id']))
+
+    def test_del_nonexistent_image(self):
+        images = self.setup_images(None)
+        images.del_image(MockDockerId())
+        self.assertTrue(images.has_image(self.mock_images[0]['Id']))
+
+    def test_one_image_always_kept(self):
+        # When crunch-job starts a job, it makes sure each compute node
+        # has the Docker image loaded, then it runs all the tasks with
+        # the assumption the image is on each node.  As long as that's
+        # true, the cleaner should avoid removing every installed image:
+        # crunch-job might be counting on the most recent one to be
+        # available, even if it's not currently in use.
+        images = self.setup_images(None, None, target_size=1)
+        for use_time, image in enumerate(self.mock_images, 1):
+            user = MockContainer(image)
+            images.add_user(user, use_time)
+            images.end_user(user['Id'])
+        self.assertEqual([self.mock_images[0]['Id']],
+                         list(images.should_delete()))
+
+    def test_images_under_target_not_deletable(self):
+        # The images are used in this order.  target_size is set so it
+        # could hold the largest image, but not after the most recently
+        # used image is kept; then we have to fall back to the previous one.
+        images = self.setup_images(20, 30, 40, 10, target_size=45)
+        for use_time, image in enumerate(self.mock_images, 1):
+            user = MockContainer(image)
+            images.add_user(user, use_time)
+            images.end_user(user['Id'])
+        self.assertEqual([self.mock_images[ii]['Id'] for ii in [0, 2]],
+                         list(images.should_delete()))
+
+    def test_images_in_use_not_deletable(self):
+        images = self.setup_images(None, None, target_size=1)
+        users = [MockContainer(image) for image in self.mock_images]
+        images.add_user(users[0], 1)
+        images.add_user(users[1], 2)
+        images.end_user(users[1]['Id'])
+        self.assertEqual([self.mock_images[1]['Id']],
+                         list(images.should_delete()))
+
+    def test_image_deletable_after_unused(self):
+        images = self.setup_images(None, None, target_size=1)
+        users = [MockContainer(image) for image in self.mock_images]
+        images.add_user(users[0], 1)
+        images.add_user(users[1], 2)
+        images.end_user(users[0]['Id'])
+        self.assertEqual([self.mock_images[0]['Id']],
+                         list(images.should_delete()))
+
+    def test_image_not_deletable_if_user_restarts(self):
+        images = self.setup_images(None, target_size=1)
+        user = MockContainer(self.mock_images[-1])
+        images.add_user(user, 1)
+        images.end_user(user['Id'])
+        images.add_user(user, 2)
+        self.assertEqual([], list(images.should_delete()))
+
+    def test_image_not_deletable_if_any_user_remains(self):
+        images = self.setup_images(None, target_size=1)
+        users = [MockContainer(self.mock_images[0]) for ii in range(2)]
+        images.add_user(users[0], 1)
+        images.add_user(users[1], 2)
+        images.end_user(users[0]['Id'])
+        self.assertEqual([], list(images.should_delete()))
+
+    def test_image_deletable_after_all_users_end(self):
+        images = self.setup_images(None, None, target_size=1)
+        users = [MockContainer(self.mock_images[ii]) for ii in [0, 1, 1]]
+        images.add_user(users[0], 1)
+        images.add_user(users[1], 2)
+        images.add_user(users[2], 3)
+        images.end_user(users[1]['Id'])
+        images.end_user(users[2]['Id'])
+        self.assertEqual([self.mock_images[-1]['Id']],
+                         list(images.should_delete()))
+
+    def test_images_suggested_for_deletion_by_lru(self):
+        images = self.setup_images(10, 10, 10, target_size=1)
+        users = [MockContainer(image) for image in self.mock_images]
+        images.add_user(users[0], 3)
+        images.add_user(users[1], 1)
+        images.add_user(users[2], 2)
+        for user in users:
+            images.end_user(user['Id'])
+        self.assertEqual([self.mock_images[ii]['Id'] for ii in [1, 2]],
+                         list(images.should_delete()))
+
+    def test_adding_user_without_image_does_not_implicitly_add_image(self):
+        images = self.setup_images(10)
+        images.add_user(MockContainer(MockImage()), 1)
+        self.assertEqual([], list(images.should_delete()))
+
+    def test_nonexistent_user_removed(self):
+        images = self.setup_images()
+        images.end_user('nonexistent')
+        # No exception should be raised.
+
+    def test_del_image_effective_with_users_present(self):
+        images = self.setup_images(None, target_size=1)
+        user = MockContainer(self.mock_images[0])
+        images.add_user(user, 1)
+        images.del_image(self.mock_images[0]['Id'])
+        images.end_user(user['Id'])
+        self.assertEqual([], list(images.should_delete()))
+
+    def setup_from_daemon(self, *vsizes, target_size=1500000):
+        self.setup_mock_images(*vsizes)
+        docker_client = mock.MagicMock(name='docker_client')
+        docker_client.images.return_value = iter(self.mock_images)
+        return cleaner.DockerImages.from_daemon(target_size, docker_client)
+
+    def test_images_loaded_from_daemon(self):
+        images = self.setup_from_daemon(None, None)
+        for image in self.mock_images:
+            self.assertTrue(images.has_image(image['Id']))
+
+    def test_target_size_set_from_daemon(self):
+        images = self.setup_from_daemon(20, 10, 5, target_size=15)
+        user = MockContainer(self.mock_images[-1])
+        images.add_user(user, 1)
+        self.assertEqual([self.mock_images[0]['Id']],
+                         list(images.should_delete()))
+
+
+class DockerImageUseRecorderTestCase(unittest.TestCase):
+    TEST_CLASS = cleaner.DockerImageUseRecorder
+
+    def setUp(self):
+        self.images = mock.MagicMock(name='images')
+        self.docker_client = mock.MagicMock(name='docker_client')
+        self.events = []
+        self.recorder = self.TEST_CLASS(self.images, self.docker_client,
+                                        self.encoded_events)
+
+    @property
+    def encoded_events(self):
+        return (event.encoded() for event in self.events)
+
+    def test_unknown_events_ignored(self):
+        self.events.append(MockEvent('mock!event'))
+        self.recorder.run()
+        # No exception should be raised.
+
+    def test_fetches_container_on_create(self):
+        self.events.append(MockEvent('create'))
+        self.recorder.run()
+        self.docker_client.inspect_container.assert_called_with(
+            self.events[0]['id'])
+
+    def test_adds_user_on_container_create(self):
+        self.events.append(MockEvent('create'))
+        self.recorder.run()
+        self.images.add_user.assert_called_with(
+            self.docker_client.inspect_container(), self.events[0]['time'])
+
+    def test_unknown_image_handling(self):
+        # The use recorder should not fetch any images.
+        self.events.append(MockEvent('create'))
+        self.recorder.run()
+        self.assertFalse(self.docker_client.inspect_image.called)
+
+    def test_unfetchable_containers_ignored(self):
+        self.events.append(MockEvent('create'))
+        self.docker_client.inspect_container.side_effect = MockException(404)
+        self.recorder.run()
+        self.assertFalse(self.images.add_user.called)
+
+    def test_ends_user_on_container_destroy(self):
+        self.events.append(MockEvent('destroy'))
+        self.recorder.run()
+        self.images.end_user.assert_called_with(self.events[0]['id'])
+
+
+class DockerImageCleanerTestCase(DockerImageUseRecorderTestCase):
+    TEST_CLASS = cleaner.DockerImageCleaner
+
+    def test_unknown_image_handling(self):
+        # The image cleaner should fetch and record new images.
+        self.images.has_image.return_value = False
+        self.events.append(MockEvent('create'))
+        self.recorder.run()
+        self.docker_client.inspect_image.assert_called_with(
+            self.docker_client.inspect_container()['Image'])
+        self.images.add_image.assert_called_with(
+            self.docker_client.inspect_image())
+
+    def test_unfetchable_images_ignored(self):
+        self.images.has_image.return_value = False
+        self.docker_client.inspect_image.side_effect = MockException(404)
+        self.events.append(MockEvent('create'))
+        self.recorder.run()
+        self.docker_client.inspect_image.assert_called_with(
+            self.docker_client.inspect_container()['Image'])
+        self.assertFalse(self.images.add_image.called)
+
+    def test_deletions_after_destroy(self):
+        delete_id = MockDockerId()
+        self.images.should_delete.return_value = [delete_id]
+        self.events.append(MockEvent('destroy'))
+        self.recorder.run()
+        self.docker_client.remove_image.assert_called_with(delete_id)
+        self.images.del_image.assert_called_with(delete_id)
+
+    def test_failed_deletion_handling(self):
+        delete_id = MockDockerId()
+        self.images.should_delete.return_value = [delete_id]
+        self.docker_client.remove_image.side_effect = MockException(500)
+        self.events.append(MockEvent('destroy'))
+        self.recorder.run()
+        self.docker_client.remove_image.assert_called_with(delete_id)
+        self.assertFalse(self.images.del_image.called)
+
+
+class HumanSizeTestCase(unittest.TestCase):
+    def check(self, human_str, count, exp):
+        self.assertEqual(count * (1024 ** exp),
+                         cleaner.human_size(human_str))
+
+    def test_bytes(self):
+        self.check('1', 1, 0)
+        self.check('82', 82, 0)
+
+    def test_kibibytes(self):
+        self.check('2K', 2, 1)
+        self.check('3k', 3, 1)
+
+    def test_mebibytes(self):
+        self.check('4M', 4, 2)
+        self.check('5m', 5, 2)
+
+    def test_gibibytes(self):
+        self.check('6G', 6, 3)
+        self.check('7g', 7, 3)
+
+    def test_tebibytes(self):
+        self.check('8T', 8, 4)
+        self.check('9t', 9, 4)
+
+
+class RunTestCase(unittest.TestCase):
+    def setUp(self):
+        self.args = mock.MagicMock(name='args')
+        self.args.quota = 1000000
+        self.docker_client = mock.MagicMock(name='docker_client')
+
+    def test_run(self):
+        test_start_time = int(time.time())
+        self.docker_client.events.return_value = []
+        cleaner.run(self.args, self.docker_client)
+        self.assertEqual(2, self.docker_client.events.call_count)
+        event_kwargs = [args[1] for args in
+                        self.docker_client.events.call_args_list]
+        self.assertIn('since', event_kwargs[0])
+        self.assertIn('until', event_kwargs[0])
+        self.assertLessEqual(test_start_time, event_kwargs[0]['until'])
+        self.assertIn('since', event_kwargs[1])
+        self.assertEqual(event_kwargs[0]['until'], event_kwargs[1]['since'])
diff --git a/services/fuse/README.rst b/services/fuse/README.rst

index d9a9a0789f8da010c8f199804ad14f75fcc7e240..f0b2677d86bd8626941b571e2bb20374c2613fa4 100644 (file)
--- a/services/fuse/README.rst
+++ b/services/fuse/README.rst
@@ -55,6 +55,10 @@ on your system.
  Testing and Development
  -----------------------
  
+Debian packages you need to build llfuse:
+
+$ apt-get install python-dev pkg-config libfuse-dev libattr1-dev
+
  This package is one part of the Arvados source package, and it has
  integration tests to check interoperability with other Arvados
  components.  Our `hacking guide
diff --git a/services/fuse/arvados_fuse/__init__.py b/services/fuse/arvados_fuse/__init__.py

index 2d4f6c9dcde55eac714f3b2a0171fa3c836b59b6..49151318a751941742295ad427816414cfe4ad43 100644 (file)
--- a/services/fuse/arvados_fuse/__init__.py
+++ b/services/fuse/arvados_fuse/__init__.py
@@ -1,6 +1,49 @@
-#
-# FUSE driver for Arvados Keep
-#
+"""FUSE driver for Arvados Keep
+
+Architecture:
+
+There is one `Operations` object per mount point.  It is the entry point for all
+read and write requests from the llfuse module.
+
+The operations object owns an `Inodes` object.  The inodes object stores the
+mapping from numeric inode (used throughout the file system API to uniquely
+identify files) to the Python objects that implement files and directories.
+
+The `Inodes` object owns an `InodeCache` object.  The inode cache records the
+memory footprint of file system objects and when they are last used.  When the
+cache limit is exceeded, the least recently used objects are cleared.
+
+File system objects inherit from `fresh.FreshBase` which manages the object lifecycle.
+
+File objects inherit from `fusefile.File`.  Key methods are `readfrom` and `writeto`
+which implement actual reads and writes.
+
+Directory objects inherit from `fusedir.Directory`.  The directory object wraps
+a Python dict which stores the mapping from filenames to directory entries.
+Directory contents can be accessed through the Python operators such as `[]`
+and `in`.  These methods automatically check if the directory is fresh (up to
+date) or stale (needs update) and will call `update` if necessary before
+returing a result.
+
+The general FUSE operation flow is as follows:
+
+- The request handler is called with either an inode or file handle that is the
+  subject of the operation.
+
+- Look up the inode using the Inodes table or the file handle in the
+  filehandles table to get the file system object.
+
+- For methods that alter files or directories, check that the operation is
+  valid and permitted using _check_writable().
+
+- Call the relevant method on the file system object.
+
+- Return the result.
+
+The FUSE driver supports the Arvados event bus.  When an event is received for
+an object that is live in the inode cache, that object is immediately updated.
+
+"""
  
  import os
  import sys
@@ -21,663 +64,151 @@ import calendar
  import threading
  import itertools
  import ciso8601
+import collections
+import functools
  
-from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
-
-_logger = logging.getLogger('arvados.arvados_fuse')
-
-# Match any character which FUSE or Linux cannot accommodate as part
-# of a filename. (If present in a collection filename, they will
-# appear as underscores in the fuse mount.)
-_disallowed_filename_characters = re.compile('[\x00/]')
-
-def convertTime(t):
-    """Parse Arvados timestamp to unix time."""
-    if not t:
-        return 0
-    try:
-        return calendar.timegm(ciso8601.parse_datetime_unaware(t).timetuple())
-    except (TypeError, ValueError):
-        return 0
-
-def sanitize_filename(dirty):
-    '''Replace disallowed filename characters with harmless "_".'''
-    if dirty is None:
-        return None
-    elif dirty == '':
-        return '_'
-    elif dirty == '.':
-        return '_'
-    elif dirty == '..':
-        return '__'
-    else:
-        return _disallowed_filename_characters.sub('_', dirty)
-
+import Queue
  
-class FreshBase(object):
-    '''Base class for maintaining fresh/stale state to determine when to update.'''
-    def __init__(self):
-        self._stale = True
-        self._poll = False
-        self._last_update = time.time()
-        self._atime = time.time()
-        self._poll_time = 60
-
-    # Mark the value as stale
-    def invalidate(self):
-        self._stale = True
-
-    # Test if the entries dict is stale.
-    def stale(self):
-        if self._stale:
-            return True
-        if self._poll:
-            return (self._last_update + self._poll_time) < self._atime
-        return False
-
-    def fresh(self):
-        self._stale = False
-        self._last_update = time.time()
-
-    def atime(self):
-        return self._atime
-
-class File(FreshBase):
-    '''Base for file objects.'''
-
-    def __init__(self, parent_inode, _mtime=0):
-        super(File, self).__init__()
-        self.inode = None
-        self.parent_inode = parent_inode
-        self._mtime = _mtime
-
-    def size(self):
-        return 0
-
-    def readfrom(self, off, size):
-        return ''
-
-    def mtime(self):
-        return self._mtime
-
-
-class StreamReaderFile(File):
-    '''Wraps a StreamFileReader as a file.'''
-
-    def __init__(self, parent_inode, reader, _mtime):
-        super(StreamReaderFile, self).__init__(parent_inode, _mtime)
-        self.reader = reader
-
-    def size(self):
-        return self.reader.size()
-
-    def readfrom(self, off, size):
-        return self.reader.readfrom(off, size)
-
-    def stale(self):
-        return False
-
-
-class StringFile(File):
-    '''Wrap a simple string as a file'''
-    def __init__(self, parent_inode, contents, _mtime):
-        super(StringFile, self).__init__(parent_inode, _mtime)
-        self.contents = contents
-
-    def size(self):
-        return len(self.contents)
-
-    def readfrom(self, off, size):
-        return self.contents[off:(off+size)]
-
-
-class ObjectFile(StringFile):
-    '''Wrap a dict as a serialized json object.'''
-
-    def __init__(self, parent_inode, obj):
-        super(ObjectFile, self).__init__(parent_inode, "", 0)
-        self.uuid = obj['uuid']
-        self.update(obj)
+# Default _notify_queue has a limit of 1000 items, but it really needs to be
+# unlimited to avoid deadlocks, see https://arvados.org/issues/3198#note-43 for
+# details.
  
-    def update(self, obj):
-        self._mtime = convertTime(obj['modified_at']) if 'modified_at' in obj else 0
-        self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n"
-
-
-class Directory(FreshBase):
-    '''Generic directory object, backed by a dict.
-    Consists of a set of entries with the key representing the filename
-    and the value referencing a File or Directory object.
-    '''
-
-    def __init__(self, parent_inode):
-        super(Directory, self).__init__()
-
-        '''parent_inode is the integer inode number'''
-        self.inode = None
-        if not isinstance(parent_inode, int):
-            raise Exception("parent_inode should be an int")
-        self.parent_inode = parent_inode
-        self._entries = {}
-        self._mtime = time.time()
-
-    #  Overriden by subclasses to implement logic to update the entries dict
-    #  when the directory is stale
-    def update(self):
-        pass
-
-    # Only used when computing the size of the disk footprint of the directory
-    # (stub)
-    def size(self):
-        return 0
-
-    def checkupdate(self):
-        if self.stale():
-            try:
-                self.update()
-            except apiclient.errors.HttpError as e:
-                _logger.debug(e)
-
-    def __getitem__(self, item):
-        self.checkupdate()
-        return self._entries[item]
-
-    def items(self):
-        self.checkupdate()
-        return self._entries.items()
-
-    def __iter__(self):
-        self.checkupdate()
-        return self._entries.iterkeys()
-
-    def __contains__(self, k):
-        self.checkupdate()
-        return k in self._entries
+llfuse.capi._notify_queue = Queue.Queue()
  
-    def merge(self, items, fn, same, new_entry):
-        '''Helper method for updating the contents of the directory.  Takes a list
-        describing the new contents of the directory, reuse entries that are
-        the same in both the old and new lists, create new entries, and delete
-        old entries missing from the new list.
+from fusedir import sanitize_filename, Directory, CollectionDirectory, MagicDirectory, TagsDirectory, ProjectDirectory, SharedDirectory, CollectionDirectoryBase
+from fusefile import StringFile, FuseArvadosFile
  
-        items: iterable with new directory contents
+_logger = logging.getLogger('arvados.arvados_fuse')
  
-        fn: function to take an entry in 'items' and return the desired file or
-        directory name, or None if this entry should be skipped
+# Uncomment this to enable llfuse debug logging.
+# log_handler = logging.StreamHandler()
+# llogger = logging.getLogger('llfuse')
+# llogger.addHandler(log_handler)
+# llogger.setLevel(logging.DEBUG)
  
-        same: function to compare an existing entry (a File or Directory
-        object) with an entry in the items list to determine whether to keep
-        the existing entry.
+class Handle(object):
+    """Connects a numeric file handle to a File or Directory object that has
+    been opened by the client."""
  
-        new_entry: function to create a new directory entry (File or Directory
-        object) from an entry in the items list.
+    def __init__(self, fh, obj):
+        self.fh = fh
+        self.obj = obj
+        self.obj.inc_use()
  
-        '''
+    def release(self):
+        self.obj.dec_use()
  
-        oldentries = self._entries
-        self._entries = {}
-        changed = False
-        for i in items:
-            name = sanitize_filename(fn(i))
-            if name:
-                if name in oldentries and same(oldentries[name], i):
-                    # move existing directory entry over
-                    self._entries[name] = oldentries[name]
-                    del oldentries[name]
-                else:
-                    # create new directory entry
-                    ent = new_entry(i)
-                    if ent is not None:
-                        self._entries[name] = self.inodes.add_entry(ent)
-                        changed = True
+    def flush(self):
+        if self.obj.writable():
+            return self.obj.flush()
  
-        # delete any other directory entries that were not in found in 'items'
-        for i in oldentries:
-            llfuse.invalidate_entry(self.inode, str(i))
-            self.inodes.del_entry(oldentries[i])
-            changed = True
  
-        if changed:
-            self._mtime = time.time()
+class FileHandle(Handle):
+    """Connects a numeric file handle to a File  object that has
+    been opened by the client."""
+    pass
  
-        self.fresh()
  
-    def clear(self):
-        '''Delete all entries'''
-        oldentries = self._entries
-        self._entries = {}
-        for n in oldentries:
-            if isinstance(n, Directory):
-                n.clear()
-            llfuse.invalidate_entry(self.inode, str(n))
-            self.inodes.del_entry(oldentries[n])
-        llfuse.invalidate_inode(self.inode)
-        self.invalidate()
+class DirectoryHandle(Handle):
+    """Connects a numeric file handle to a Directory object that has
+    been opened by the client."""
  
-    def mtime(self):
-        return self._mtime
+    def __init__(self, fh, dirobj, entries):
+        super(DirectoryHandle, self).__init__(fh, dirobj)
+        self.entries = entries
  
  
-class CollectionDirectory(Directory):
-    '''Represents the root of a directory tree holding a collection.'''
+class InodeCache(object):
+    """Records the memory footprint of objects and when they are last used.
  
-    def __init__(self, parent_inode, inodes, api, num_retries, collection):
-        super(CollectionDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
-        self.collection_object_file = None
-        self.collection_object = None
-        if isinstance(collection, dict):
-            self.collection_locator = collection['uuid']
-            self._mtime = convertTime(collection.get('modified_at'))
-        else:
-            self.collection_locator = collection
-            self._mtime = 0
-
-    def same(self, i):
-        return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
-
-    # Used by arv-web.py to switch the contents of the CollectionDirectory
-    def change_collection(self, new_locator):
-        """Switch the contents of the CollectionDirectory.  Must be called with llfuse.lock held."""
-        self.collection_locator = new_locator
-        self.collection_object = None
-        self.update()
-
-    def new_collection(self, new_collection_object, coll_reader):
-        self.collection_object = new_collection_object
-
-        self._mtime = convertTime(self.collection_object.get('modified_at'))
-
-        if self.collection_object_file is not None:
-            self.collection_object_file.update(self.collection_object)
-
-        self.clear()
-        for s in coll_reader.all_streams():
-            cwd = self
-            for part in s.name().split('/'):
-                if part != '' and part != '.':
-                    partname = sanitize_filename(part)
-                    if partname not in cwd._entries:
-                        cwd._entries[partname] = self.inodes.add_entry(Directory(cwd.inode))
-                    cwd = cwd._entries[partname]
-            for k, v in s.files().items():
-                cwd._entries[sanitize_filename(k)] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v, self.mtime()))
-
-    def update(self):
-        try:
-            if self.collection_object is not None and portable_data_hash_pattern.match(self.collection_locator):
-                return True
+    When the cache limit is exceeded, the least recently used objects are
+    cleared.  Clearing the object means discarding its contents to release
+    memory.  The next time the object is accessed, it must be re-fetched from
+    the server.  Note that the inode cache limit is a soft limit; the cache
+    limit may be exceeded if necessary to load very large objects, it may also
+    be exceeded if open file handles prevent objects from being cleared.
  
-            if self.collection_locator is None:
-                self.fresh()
-                return True
+    """
  
-            with llfuse.lock_released:
-                coll_reader = arvados.CollectionReader(
-                    self.collection_locator, self.api, self.api.keep,
-                    num_retries=self.num_retries)
-                new_collection_object = coll_reader.api_response() or {}
-                # If the Collection only exists in Keep, there will be no API
-                # response.  Fill in the fields we need.
-                if 'uuid' not in new_collection_object:
-                    new_collection_object['uuid'] = self.collection_locator
-                if "portable_data_hash" not in new_collection_object:
-                    new_collection_object["portable_data_hash"] = new_collection_object["uuid"]
-                if 'manifest_text' not in new_collection_object:
-                    new_collection_object['manifest_text'] = coll_reader.manifest_text()
-                coll_reader.normalize()
-            # end with llfuse.lock_released, re-acquire lock
-
-            if self.collection_object is None or self.collection_object["portable_data_hash"] != new_collection_object["portable_data_hash"]:
-                self.new_collection(new_collection_object, coll_reader)
-
-            self.fresh()
-            return True
-        except arvados.errors.NotFoundError:
-            _logger.exception("arv-mount %s: error", self.collection_locator)
-        except arvados.errors.ArgumentError as detail:
-            _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
-            if self.collection_object is not None and "manifest_text" in self.collection_object:
-                _logger.warning("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
-        except Exception:
-            _logger.exception("arv-mount %s: error", self.collection_locator)
-            if self.collection_object is not None and "manifest_text" in self.collection_object:
-                _logger.error("arv-mount manifest_text is: %s", self.collection_object["manifest_text"])
-        return False
+    def __init__(self, cap, min_entries=4):
+        self._entries = collections.OrderedDict()
+        self._by_uuid = {}
+        self._counter = itertools.count(0)
+        self.cap = cap
+        self._total = 0
+        self.min_entries = min_entries
  
-    def __getitem__(self, item):
-        self.checkupdate()
-        if item == '.arvados#collection':
-            if self.collection_object_file is None:
-                self.collection_object_file = ObjectFile(self.inode, self.collection_object)
-                self.inodes.add_entry(self.collection_object_file)
-            return self.collection_object_file
-        else:
-            return super(CollectionDirectory, self).__getitem__(item)
-
-    def __contains__(self, k):
-        if k == '.arvados#collection':
-            return True
-        else:
-            return super(CollectionDirectory, self).__contains__(k)
-
-
-class MagicDirectory(Directory):
-    '''A special directory that logically contains the set of all extant keep
-    locators.  When a file is referenced by lookup(), it is tested to see if it
-    is a valid keep locator to a manifest, and if so, loads the manifest
-    contents as a subdirectory of this directory with the locator as the
-    directory name.  Since querying a list of all extant keep locators is
-    impractical, only collections that have already been accessed are visible
-    to readdir().
-    '''
-
-    README_TEXT = '''
-This directory provides access to Arvados collections as subdirectories listed
-by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
-the form '1234567890abcdefghijklmnopqrstuv+123').
-
-Note that this directory will appear empty until you attempt to access a
-specific collection subdirectory (such as trying to 'cd' into it), at which
-point the collection will actually be looked up on the server and the directory
-will appear if it exists.
-'''.lstrip()
-
-    def __init__(self, parent_inode, inodes, api, num_retries):
-        super(MagicDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
+    def total(self):
+        return self._total
  
-    def __setattr__(self, name, value):
-        super(MagicDirectory, self).__setattr__(name, value)
-        # When we're assigned an inode, add a README.
-        if ((name == 'inode') and (self.inode is not None) and
-              (not self._entries)):
-            self._entries['README'] = self.inodes.add_entry(
-                StringFile(self.inode, self.README_TEXT, time.time()))
-            # If we're the root directory, add an identical by_id subdirectory.
-            if self.inode == llfuse.ROOT_INODE:
-                self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
-                        self.inode, self.inodes, self.api, self.num_retries))
-
-    def __contains__(self, k):
-        if k in self._entries:
-            return True
-
-        if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k):
+    def _remove(self, obj, clear):
+        if clear and not obj.clear():
+            _logger.debug("InodeCache could not clear %i in_use %s", obj.inode, obj.in_use())
              return False
+        self._total -= obj.cache_size
+        del self._entries[obj.cache_priority]
+        if obj.cache_uuid:
+            self._by_uuid[obj.cache_uuid].remove(obj)
+            if not self._by_uuid[obj.cache_uuid]:
+                del self._by_uuid[obj.cache_uuid]
+            obj.cache_uuid = None
+        if clear:
+            _logger.debug("InodeCache cleared %i total now %i", obj.inode, self._total)
+        return True
  
-        try:
-            e = self.inodes.add_entry(CollectionDirectory(
-                    self.inode, self.inodes, self.api, self.num_retries, k))
-            if e.update():
-                self._entries[k] = e
-                return True
-            else:
-                return False
-        except Exception as e:
-            _logger.debug('arv-mount exception keep %s', e)
-            return False
-
-    def __getitem__(self, item):
-        if item in self:
-            return self._entries[item]
-        else:
-            raise KeyError("No collection with id " + item)
-
-
-class RecursiveInvalidateDirectory(Directory):
-    def invalidate(self):
-        if self.inode == llfuse.ROOT_INODE:
-            llfuse.lock.acquire()
-        try:
-            super(RecursiveInvalidateDirectory, self).invalidate()
-            for a in self._entries:
-                self._entries[a].invalidate()
-        except Exception:
-            _logger.exception()
-        finally:
-            if self.inode == llfuse.ROOT_INODE:
-                llfuse.lock.release()
-
-
-class TagsDirectory(RecursiveInvalidateDirectory):
-    '''A special directory that contains as subdirectories all tags visible to the user.'''
-
-    def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
-        super(TagsDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
-        self._poll = True
-        self._poll_time = poll_time
-
-    def update(self):
-        with llfuse.lock_released:
-            tags = self.api.links().list(
-                filters=[['link_class', '=', 'tag']],
-                select=['name'], distinct=True
-                ).execute(num_retries=self.num_retries)
-        if "items" in tags:
-            self.merge(tags['items'],
-                       lambda i: i['name'],
-                       lambda a, i: a.tag == i['name'],
-                       lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time))
-
-
-class TagDirectory(Directory):
-    '''A special directory that contains as subdirectories all collections visible
-    to the user that are tagged with a particular tag.
-    '''
-
-    def __init__(self, parent_inode, inodes, api, num_retries, tag,
-                 poll=False, poll_time=60):
-        super(TagDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
-        self.tag = tag
-        self._poll = poll
-        self._poll_time = poll_time
-
-    def update(self):
-        with llfuse.lock_released:
-            taggedcollections = self.api.links().list(
-                filters=[['link_class', '=', 'tag'],
-                         ['name', '=', self.tag],
-                         ['head_uuid', 'is_a', 'arvados#collection']],
-                select=['head_uuid']
-                ).execute(num_retries=self.num_retries)
-        self.merge(taggedcollections['items'],
-                   lambda i: i['head_uuid'],
-                   lambda a, i: a.collection_locator == i['head_uuid'],
-                   lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']))
-
-
-class ProjectDirectory(Directory):
-    '''A special directory that contains the contents of a project.'''
-
-    def __init__(self, parent_inode, inodes, api, num_retries, project_object,
-                 poll=False, poll_time=60):
-        super(ProjectDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
-        self.project_object = project_object
-        self.project_object_file = None
-        self.uuid = project_object['uuid']
-        self._poll = poll
-        self._poll_time = poll_time
-
-    def createDirectory(self, i):
-        if collection_uuid_pattern.match(i['uuid']):
-            return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
-        elif group_uuid_pattern.match(i['uuid']):
-            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time)
-        elif link_uuid_pattern.match(i['uuid']):
-            if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
-                return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
-            else:
-                return None
-        elif uuid_pattern.match(i['uuid']):
-            return ObjectFile(self.parent_inode, i)
-        else:
-            return None
-
-    def update(self):
-        if self.project_object_file == None:
-            self.project_object_file = ObjectFile(self.inode, self.project_object)
-            self.inodes.add_entry(self.project_object_file)
-
-        def namefn(i):
-            if 'name' in i:
-                if i['name'] is None or len(i['name']) == 0:
-                    return None
-                elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']):
-                    # collection or subproject
-                    return i['name']
-                elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection':
-                    # name link
-                    return i['name']
-                elif 'kind' in i and i['kind'].startswith('arvados#'):
-                    # something else
-                    return "{}.{}".format(i['name'], i['kind'][8:])
-            else:
-                return None
-
-        def samefn(a, i):
-            if isinstance(a, CollectionDirectory):
-                return a.collection_locator == i['uuid']
-            elif isinstance(a, ProjectDirectory):
-                return a.uuid == i['uuid']
-            elif isinstance(a, ObjectFile):
-                return a.uuid == i['uuid'] and not a.stale()
-            return False
-
-        with llfuse.lock_released:
-            if group_uuid_pattern.match(self.uuid):
-                self.project_object = self.api.groups().get(
-                    uuid=self.uuid).execute(num_retries=self.num_retries)
-            elif user_uuid_pattern.match(self.uuid):
-                self.project_object = self.api.users().get(
-                    uuid=self.uuid).execute(num_retries=self.num_retries)
-
-            contents = arvados.util.list_all(self.api.groups().contents,
-                                             self.num_retries, uuid=self.uuid)
-            # Name links will be obsolete soon, take this out when there are no more pre-#3036 in use.
-            contents += arvados.util.list_all(
-                self.api.links().list, self.num_retries,
-                filters=[['tail_uuid', '=', self.uuid],
-                         ['link_class', '=', 'name']])
-
-        # end with llfuse.lock_released, re-acquire lock
-
-        self.merge(contents,
-                   namefn,
-                   samefn,
-                   self.createDirectory)
-
-    def __getitem__(self, item):
-        self.checkupdate()
-        if item == '.arvados#project':
-            return self.project_object_file
-        else:
-            return super(ProjectDirectory, self).__getitem__(item)
-
-    def __contains__(self, k):
-        if k == '.arvados#project':
-            return True
+    def cap_cache(self):
+        if self._total > self.cap:
+            for key in list(self._entries.keys()):
+                if self._total < self.cap or len(self._entries) < self.min_entries:
+                    break
+                self._remove(self._entries[key], True)
+
+    def manage(self, obj):
+        if obj.persisted():
+            obj.cache_priority = next(self._counter)
+            obj.cache_size = obj.objsize()
+            self._entries[obj.cache_priority] = obj
+            obj.cache_uuid = obj.uuid()
+            if obj.cache_uuid:
+                if obj.cache_uuid not in self._by_uuid:
+                    self._by_uuid[obj.cache_uuid] = [obj]
+                else:
+                    if obj not in self._by_uuid[obj.cache_uuid]:
+                        self._by_uuid[obj.cache_uuid].append(obj)
+            self._total += obj.objsize()
+            _logger.debug("InodeCache touched %i (size %i) (uuid %s) total now %i", obj.inode, obj.objsize(), obj.cache_uuid, self._total)
+            self.cap_cache()
          else:
-            return super(ProjectDirectory, self).__contains__(k)
+            obj.cache_priority = None
  
+    def touch(self, obj):
+        if obj.persisted():
+            if obj.cache_priority in self._entries:
+                self._remove(obj, False)
+            self.manage(obj)
  
-class SharedDirectory(Directory):
-    '''A special directory that represents users or groups who have shared projects with me.'''
+    def unmanage(self, obj):
+        if obj.persisted() and obj.cache_priority in self._entries:
+            self._remove(obj, True)
  
-    def __init__(self, parent_inode, inodes, api, num_retries, exclude,
-                 poll=False, poll_time=60):
-        super(SharedDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-        self.api = api
-        self.num_retries = num_retries
-        self.current_user = api.users().current().execute(num_retries=num_retries)
-        self._poll = True
-        self._poll_time = poll_time
-
-    def update(self):
-        with llfuse.lock_released:
-            all_projects = arvados.util.list_all(
-                self.api.groups().list, self.num_retries,
-                filters=[['group_class','=','project']])
-            objects = {}
-            for ob in all_projects:
-                objects[ob['uuid']] = ob
-
-            roots = []
-            root_owners = {}
-            for ob in all_projects:
-                if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
-                    roots.append(ob)
-                    root_owners[ob['owner_uuid']] = True
-
-            lusers = arvados.util.list_all(
-                self.api.users().list, self.num_retries,
-                filters=[['uuid','in', list(root_owners)]])
-            lgroups = arvados.util.list_all(
-                self.api.groups().list, self.num_retries,
-                filters=[['uuid','in', list(root_owners)]])
-
-            users = {}
-            groups = {}
-
-            for l in lusers:
-                objects[l["uuid"]] = l
-            for l in lgroups:
-                objects[l["uuid"]] = l
-
-            contents = {}
-            for r in root_owners:
-                if r in objects:
-                    obr = objects[r]
-                    if "name" in obr:
-                        contents[obr["name"]] = obr
-                    if "first_name" in obr:
-                        contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
-
-            for r in roots:
-                if r['owner_uuid'] not in objects:
-                    contents[r['name']] = r
-
-        # end with llfuse.lock_released, re-acquire lock
-
-        try:
-            self.merge(contents.items(),
-                       lambda i: i[0],
-                       lambda a, i: a.uuid == i[1]['uuid'],
-                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
-        except Exception:
-            _logger.exception()
-
-
-class FileHandle(object):
-    '''Connects a numeric file handle to a File or Directory object that has
-    been opened by the client.'''
-
-    def __init__(self, fh, entry):
-        self.fh = fh
-        self.entry = entry
+    def find(self, uuid):
+        return self._by_uuid.get(uuid)
  
+    def clear(self):
+        self._entries.clear()
+        self._by_uuid.clear()
+        self._total = 0
  
  class Inodes(object):
-    '''Manage the set of inodes.  This is the mapping from a numeric id
-    to a concrete File or Directory object'''
+    """Manage the set of inodes.  This is the mapping from a numeric id
+    to a concrete File or Directory object"""
  
-    def __init__(self):
+    def __init__(self, inode_cache, encoding="utf-8"):
          self._entries = {}
          self._counter = itertools.count(llfuse.ROOT_INODE)
+        self.inode_cache = inode_cache
+        self.encoding = encoding
+        self.deferred_invalidations = []
  
      def __getitem__(self, item):
          return self._entries[item]
@@ -694,48 +225,160 @@ class Inodes(object):
      def __contains__(self, k):
          return k in self._entries
  
+    def touch(self, entry):
+        entry._atime = time.time()
+        self.inode_cache.touch(entry)
+
      def add_entry(self, entry):
          entry.inode = next(self._counter)
+        if entry.inode == llfuse.ROOT_INODE:
+            entry.inc_ref()
          self._entries[entry.inode] = entry
+        self.inode_cache.manage(entry)
          return entry
  
      def del_entry(self, entry):
-        llfuse.invalidate_inode(entry.inode)
-        del self._entries[entry.inode]
+        if entry.ref_count == 0:
+            self.inode_cache.unmanage(entry)
+            del self._entries[entry.inode]
+            with llfuse.lock_released:
+                entry.finalize()
+            self.invalidate_inode(entry.inode)
+            entry.inode = None
+        else:
+            entry.dead = True
+            _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
+
+    def invalidate_inode(self, inode):
+        llfuse.invalidate_inode(inode)
+
+    def invalidate_entry(self, inode, name):
+        llfuse.invalidate_entry(inode, name)
+
+    def clear(self):
+        self.inode_cache.clear()
+
+        for k,v in self._entries.items():
+            try:
+                v.finalize()
+            except Exception as e:
+                _logger.exception("Error during finalize of inode %i", k)
+
+        self._entries.clear()
+
+
+def catch_exceptions(orig_func):
+    """Catch uncaught exceptions and log them consistently."""
+
+    @functools.wraps(orig_func)
+    def catch_exceptions_wrapper(self, *args, **kwargs):
+        try:
+            return orig_func(self, *args, **kwargs)
+        except llfuse.FUSEError:
+            raise
+        except EnvironmentError as e:
+            raise llfuse.FUSEError(e.errno)
+        except arvados.errors.KeepWriteError as e:
+            _logger.error("Keep write error: " + str(e))
+            raise llfuse.FUSEError(errno.EIO)
+        except arvados.errors.NotFoundError as e:
+            _logger.error("Block not found error: " + str(e))
+            raise llfuse.FUSEError(errno.EIO)
+        except:
+            _logger.exception("Unhandled exception during FUSE operation")
+            raise llfuse.FUSEError(errno.EIO)
+
+    return catch_exceptions_wrapper
+
  
  class Operations(llfuse.Operations):
-    '''This is the main interface with llfuse.  The methods on this object are
-    called by llfuse threads to service FUSE events to query and read from
-    the file system.
+    """This is the main interface with llfuse.
+
+    The methods on this object are called by llfuse threads to service FUSE
+    events to query and read from the file system.
  
      llfuse has its own global lock which is acquired before calling a request handler,
      so request handlers do not run concurrently unless the lock is explicitly released
-    using "with llfuse.lock_released:"'''
+    using 'with llfuse.lock_released:'
  
-    def __init__(self, uid, gid, encoding="utf-8"):
+    """
+
+    def __init__(self, uid, gid, encoding="utf-8", inode_cache=None, num_retries=4, enable_write=False):
          super(Operations, self).__init__()
  
-        self.inodes = Inodes()
+        if not inode_cache:
+            inode_cache = InodeCache(cap=256*1024*1024)
+        self.inodes = Inodes(inode_cache, encoding=encoding)
          self.uid = uid
          self.gid = gid
-        self.encoding = encoding
+        self.enable_write = enable_write
  
          # dict of inode to filehandle
          self._filehandles = {}
-        self._filehandles_counter = 1
+        self._filehandles_counter = itertools.count(0)
  
          # Other threads that need to wait until the fuse driver
          # is fully initialized should wait() on this event object.
          self.initlock = threading.Event()
  
+        self.num_retries = num_retries
+
+        self.events = None
+
      def init(self):
          # Allow threads that are waiting for the driver to be finished
          # initializing to continue
          self.initlock.set()
  
+    @catch_exceptions
+    def destroy(self):
+        if self.events:
+            self.events.close()
+            self.events = None
+
+        self.inodes.clear()
+
      def access(self, inode, mode, ctx):
          return True
  
+    def listen_for_events(self, api_client):
+        self.events = arvados.events.subscribe(api_client,
+                                 [["event_type", "in", ["create", "update", "delete"]]],
+                                 self.on_event)
+
+    @catch_exceptions
+    def on_event(self, ev):
+        if 'event_type' in ev:
+            with llfuse.lock:
+                items = self.inodes.inode_cache.find(ev["object_uuid"])
+                if items is not None:
+                    for item in items:
+                        item.invalidate()
+                        if ev["object_kind"] == "arvados#collection":
+                            new_attr = ev.get("properties") and ev["properties"].get("new_attributes") and ev["properties"]["new_attributes"]
+
+                            # new_attributes.modified_at currently lacks subsecond precision (see #6347) so use event_at which
+                            # should always be the same.
+                            #record_version = (new_attr["modified_at"], new_attr["portable_data_hash"]) if new_attr else None
+                            record_version = (ev["event_at"], new_attr["portable_data_hash"]) if new_attr else None
+
+                            item.update(to_record_version=record_version)
+                        else:
+                            item.update()
+
+                oldowner = ev.get("properties") and ev["properties"].get("old_attributes") and ev["properties"]["old_attributes"].get("owner_uuid")
+                olditemparent = self.inodes.inode_cache.find(oldowner)
+                if olditemparent is not None:
+                    olditemparent.invalidate()
+                    olditemparent.update()
+
+                itemparent = self.inodes.inode_cache.find(ev["object_owner_uuid"])
+                if itemparent is not None:
+                    itemparent.invalidate()
+                    itemparent.update()
+
+
+    @catch_exceptions
      def getattr(self, inode):
          if inode not in self.inodes:
              raise llfuse.FUSEError(errno.ENOENT)
@@ -745,16 +388,19 @@ class Operations(llfuse.Operations):
          entry = llfuse.EntryAttributes()
          entry.st_ino = inode
          entry.generation = 0
-        entry.entry_timeout = 300
-        entry.attr_timeout = 300
+        entry.entry_timeout = 60
+        entry.attr_timeout = 60
  
          entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
          if isinstance(e, Directory):
              entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFDIR
-        elif isinstance(e, StreamReaderFile):
-            entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFREG
          else:
              entry.st_mode |= stat.S_IFREG
+            if isinstance(e, FuseArvadosFile):
+                entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH
+
+        if self.enable_write and e.writable():
+            entry.st_mode |= stat.S_IWUSR | stat.S_IWGRP | stat.S_IWOTH
  
          entry.st_nlink = 1
          entry.st_uid = self.uid
@@ -764,17 +410,29 @@ class Operations(llfuse.Operations):
          entry.st_size = e.size()
  
          entry.st_blksize = 512
-        entry.st_blocks = (e.size()/512)+1
+        entry.st_blocks = (entry.st_size/512)+1
          entry.st_atime = int(e.atime())
          entry.st_mtime = int(e.mtime())
          entry.st_ctime = int(e.mtime())
  
          return entry
  
+    @catch_exceptions
+    def setattr(self, inode, attr):
+        entry = self.getattr(inode)
+
+        e = self.inodes[inode]
+
+        if attr.st_size is not None and isinstance(e, FuseArvadosFile):
+            with llfuse.lock_released:
+                e.arvfile.truncate(attr.st_size)
+                entry.st_size = e.arvfile.size()
+
+        return entry
+
+    @catch_exceptions
      def lookup(self, parent_inode, name):
-        name = unicode(name, self.encoding)
-        _logger.debug("arv-mount lookup: parent_inode %i name %s",
-                      parent_inode, name)
+        name = unicode(name, self.inodes.encoding)
          inode = None
  
          if name == '.':
@@ -788,27 +446,42 @@ class Operations(llfuse.Operations):
                      inode = p[name].inode
  
          if inode != None:
+            _logger.debug("arv-mount lookup: parent_inode %i name '%s' inode %i",
+                      parent_inode, name, inode)
+            self.inodes[inode].inc_ref()
              return self.getattr(inode)
          else:
+            _logger.debug("arv-mount lookup: parent_inode %i name '%s' not found",
+                      parent_inode, name)
              raise llfuse.FUSEError(errno.ENOENT)
  
+    @catch_exceptions
+    def forget(self, inodes):
+        for inode, nlookup in inodes:
+            ent = self.inodes[inode]
+            _logger.debug("arv-mount forget: inode %i nlookup %i ref_count %i", inode, nlookup, ent.ref_count)
+            if ent.dec_ref(nlookup) == 0 and ent.dead:
+                self.inodes.del_entry(ent)
+
+    @catch_exceptions
      def open(self, inode, flags):
          if inode in self.inodes:
              p = self.inodes[inode]
          else:
              raise llfuse.FUSEError(errno.ENOENT)
  
-        if (flags & os.O_WRONLY) or (flags & os.O_RDWR):
-            raise llfuse.FUSEError(errno.EROFS)
-
          if isinstance(p, Directory):
              raise llfuse.FUSEError(errno.EISDIR)
  
-        fh = self._filehandles_counter
-        self._filehandles_counter += 1
+        if ((flags & os.O_WRONLY) or (flags & os.O_RDWR)) and not p.writable():
+            raise llfuse.FUSEError(errno.EPERM)
+
+        fh = next(self._filehandles_counter)
          self._filehandles[fh] = FileHandle(fh, p)
+        self.inodes.touch(p)
          return fh
  
+    @catch_exceptions
      def read(self, fh, off, size):
          _logger.debug("arv-mount read %i %i %i", fh, off, size)
          if fh in self._filehandles:
@@ -816,23 +489,41 @@ class Operations(llfuse.Operations):
          else:
              raise llfuse.FUSEError(errno.EBADF)
  
-        # update atime
-        handle.entry._atime = time.time()
+        self.inodes.touch(handle.obj)
  
-        try:
-            with llfuse.lock_released:
-                return handle.entry.readfrom(off, size)
-        except arvados.errors.NotFoundError as e:
-            _logger.warning("Block not found: " + str(e))
-            raise llfuse.FUSEError(errno.EIO)
-        except Exception:
-            _logger.exception()
-            raise llfuse.FUSEError(errno.EIO)
+        return handle.obj.readfrom(off, size, self.num_retries)
+
+    @catch_exceptions
+    def write(self, fh, off, buf):
+        _logger.debug("arv-mount write %i %i %i", fh, off, len(buf))
+        if fh in self._filehandles:
+            handle = self._filehandles[fh]
+        else:
+            raise llfuse.FUSEError(errno.EBADF)
+
+        if not handle.obj.writable():
+            raise llfuse.FUSEError(errno.EPERM)
  
+        self.inodes.touch(handle.obj)
+
+        return handle.obj.writeto(off, buf, self.num_retries)
+
+    @catch_exceptions
      def release(self, fh):
          if fh in self._filehandles:
-            del self._filehandles[fh]
+            try:
+                self._filehandles[fh].flush()
+            except Exception:
+                raise
+            finally:
+                self._filehandles[fh].release()
+                del self._filehandles[fh]
+        self.inodes.inode_cache.cap_cache()
+
+    def releasedir(self, fh):
+        self.release(fh)
  
+    @catch_exceptions
      def opendir(self, inode):
          _logger.debug("arv-mount opendir: inode %i", inode)
  
@@ -844,19 +535,19 @@ class Operations(llfuse.Operations):
          if not isinstance(p, Directory):
              raise llfuse.FUSEError(errno.ENOTDIR)
  
-        fh = self._filehandles_counter
-        self._filehandles_counter += 1
+        fh = next(self._filehandles_counter)
          if p.parent_inode in self.inodes:
              parent = self.inodes[p.parent_inode]
          else:
              raise llfuse.FUSEError(errno.EIO)
  
          # update atime
-        p._atime = time.time()
+        self.inodes.touch(p)
  
-        self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items()))
+        self._filehandles[fh] = DirectoryHandle(fh, p, [('.', p), ('..', parent)] + list(p.items()))
          return fh
  
+    @catch_exceptions
      def readdir(self, fh, off):
          _logger.debug("arv-mount readdir: fh %i off %i", fh, off)
  
@@ -865,23 +556,18 @@ class Operations(llfuse.Operations):
          else:
              raise llfuse.FUSEError(errno.EBADF)
  
-        _logger.debug("arv-mount handle.entry %s", handle.entry)
+        _logger.debug("arv-mount handle.dirobj %s", handle.obj)
  
          e = off
-        while e < len(handle.entry):
-            if handle.entry[e][1].inode in self.inodes:
-                try:
-                    yield (handle.entry[e][0].encode(self.encoding), self.getattr(handle.entry[e][1].inode), e+1)
-                except UnicodeEncodeError:
-                    pass
+        while e < len(handle.entries):
+            if handle.entries[e][1].inode in self.inodes:
+                yield (handle.entries[e][0].encode(self.inodes.encoding), self.getattr(handle.entries[e][1].inode), e+1)
              e += 1
  
-    def releasedir(self, fh):
-        del self._filehandles[fh]
-
+    @catch_exceptions
      def statfs(self):
          st = llfuse.StatvfsData()
-        st.f_bsize = 64 * 1024
+        st.f_bsize = 128 * 1024
          st.f_blocks = 0
          st.f_files = 0
  
@@ -894,12 +580,78 @@ class Operations(llfuse.Operations):
          st.f_frsize = 0
          return st
  
-    # The llfuse documentation recommends only overloading functions that
-    # are actually implemented, as the default implementation will raise ENOSYS.
-    # However, there is a bug in the llfuse default implementation of create()
-    # "create() takes exactly 5 positional arguments (6 given)" which will crash
-    # arv-mount.
-    # The workaround is to implement it with the proper number of parameters,
-    # and then everything works out.
+    def _check_writable(self, inode_parent):
+        if not self.enable_write:
+            raise llfuse.FUSEError(errno.EROFS)
+
+        if inode_parent in self.inodes:
+            p = self.inodes[inode_parent]
+        else:
+            raise llfuse.FUSEError(errno.ENOENT)
+
+        if not isinstance(p, Directory):
+            raise llfuse.FUSEError(errno.ENOTDIR)
+
+        if not p.writable():
+            raise llfuse.FUSEError(errno.EPERM)
+
+        return p
+
+    @catch_exceptions
      def create(self, inode_parent, name, mode, flags, ctx):
-        raise llfuse.FUSEError(errno.EROFS)
+        _logger.debug("arv-mount create: %i '%s' %o", inode_parent, name, mode)
+
+        p = self._check_writable(inode_parent)
+        p.create(name)
+
+        # The file entry should have been implicitly created by callback.
+        f = p[name]
+        fh = next(self._filehandles_counter)
+        self._filehandles[fh] = FileHandle(fh, f)
+        self.inodes.touch(p)
+
+        f.inc_ref()
+        return (fh, self.getattr(f.inode))
+
+    @catch_exceptions
+    def mkdir(self, inode_parent, name, mode, ctx):
+        _logger.debug("arv-mount mkdir: %i '%s' %o", inode_parent, name, mode)
+
+        p = self._check_writable(inode_parent)
+        p.mkdir(name)
+
+        # The dir entry should have been implicitly created by callback.
+        d = p[name]
+
+        d.inc_ref()
+        return self.getattr(d.inode)
+
+    @catch_exceptions
+    def unlink(self, inode_parent, name):
+        _logger.debug("arv-mount unlink: %i '%s'", inode_parent, name)
+        p = self._check_writable(inode_parent)
+        p.unlink(name)
+
+    @catch_exceptions
+    def rmdir(self, inode_parent, name):
+        _logger.debug("arv-mount rmdir: %i '%s'", inode_parent, name)
+        p = self._check_writable(inode_parent)
+        p.rmdir(name)
+
+    @catch_exceptions
+    def rename(self, inode_parent_old, name_old, inode_parent_new, name_new):
+        _logger.debug("arv-mount rename: %i '%s' %i '%s'", inode_parent_old, name_old, inode_parent_new, name_new)
+        src = self._check_writable(inode_parent_old)
+        dest = self._check_writable(inode_parent_new)
+        dest.rename(name_old, name_new, src)
+
+    @catch_exceptions
+    def flush(self, fh):
+        if fh in self._filehandles:
+            self._filehandles[fh].flush()
+
+    def fsync(self, fh, datasync):
+        self.flush(fh)
+
+    def fsyncdir(self, fh, datasync):
+        self.flush(fh)
diff --git a/services/fuse/arvados_fuse/fresh.py b/services/fuse/arvados_fuse/fresh.py

new file mode 100644 (file)

index 0000000..ec2d47a
--- /dev/null
+++ b/services/fuse/arvados_fuse/fresh.py
@@ -0,0 +1,120 @@
+import time
+import ciso8601
+import calendar
+import functools
+
+def convertTime(t):
+    """Parse Arvados timestamp to unix time."""
+    if not t:
+        return 0
+    try:
+        return calendar.timegm(ciso8601.parse_datetime_unaware(t).timetuple())
+    except (TypeError, ValueError):
+        return 0
+
+def use_counter(orig_func):
+    @functools.wraps(orig_func)
+    def use_counter_wrapper(self, *args, **kwargs):
+        try:
+            self.inc_use()
+            return orig_func(self, *args, **kwargs)
+        finally:
+            self.dec_use()
+    return use_counter_wrapper
+
+def check_update(orig_func):
+    @functools.wraps(orig_func)
+    def check_update_wrapper(self, *args, **kwargs):
+        self.checkupdate()
+        return orig_func(self, *args, **kwargs)
+    return check_update_wrapper
+
+class FreshBase(object):
+    """Base class for maintaining object lifecycle.
+
+    Functions include:
+
+    * Indicate if an object is up to date (stale() == false) or needs to be
+      updated sets stale() == True).  Use invalidate() to mark the object as
+      stale.  An object is also automatically stale if it has not been updated
+      in `_poll_time` seconds.
+
+    * Record access time (atime) timestamp
+
+    * Manage internal use count used by the inode cache ("inc_use" and
+      "dec_use").  An object which is in use cannot be cleared by the inode
+      cache.
+
+    * Manage the kernel reference count ("inc_ref" and "dec_ref").  An object
+      which is referenced by the kernel cannot have its inode entry deleted.
+
+    * Record cache footprint, cache priority
+
+    * Record Arvados uuid at the time the object is placed in the cache
+
+    * Clear the object contents (invalidates the object)
+
+    """
+    def __init__(self):
+        self._stale = True
+        self._poll = False
+        self._last_update = time.time()
+        self._atime = time.time()
+        self._poll_time = 60
+        self.use_count = 0
+        self.ref_count = 0
+        self.dead = False
+        self.cache_priority = None
+        self.cache_size = 0
+        self.cache_uuid = None
+
+    # Mark the value as stale
+    def invalidate(self):
+        self._stale = True
+
+    # Test if the entries dict is stale.
+    def stale(self):
+        if self._stale:
+            return True
+        if self._poll:
+            return (self._last_update + self._poll_time) < self._atime
+        return False
+
+    def fresh(self):
+        self._stale = False
+        self._last_update = time.time()
+
+    def atime(self):
+        return self._atime
+
+    def persisted(self):
+        return False
+
+    def clear(self, force=False):
+        pass
+
+    def in_use(self):
+        return self.use_count > 0
+
+    def inc_use(self):
+        self.use_count += 1
+
+    def dec_use(self):
+        self.use_count -= 1
+
+    def inc_ref(self):
+        self.ref_count += 1
+        return self.ref_count
+
+    def dec_ref(self, n):
+        self.ref_count -= n
+        return self.ref_count
+
+    def objsize(self):
+        return 0
+
+    def uuid(self):
+        return None
+
+    def finalize(self):
+        pass
diff --git a/services/fuse/arvados_fuse/fusedir.py b/services/fuse/arvados_fuse/fusedir.py

new file mode 100644 (file)

index 0000000..8ffca49
--- /dev/null
+++ b/services/fuse/arvados_fuse/fusedir.py
@@ -0,0 +1,849 @@
+import logging
+import re
+import time
+import llfuse
+import arvados
+import apiclient
+import functools
+import threading
+from apiclient import errors as apiclient_errors
+import errno
+
+from fusefile import StringFile, ObjectFile, FuseArvadosFile
+from fresh import FreshBase, convertTime, use_counter, check_update
+
+import arvados.collection
+from arvados.util import portable_data_hash_pattern, uuid_pattern, collection_uuid_pattern, group_uuid_pattern, user_uuid_pattern, link_uuid_pattern
+
+_logger = logging.getLogger('arvados.arvados_fuse')
+
+
+# Match any character which FUSE or Linux cannot accommodate as part
+# of a filename. (If present in a collection filename, they will
+# appear as underscores in the fuse mount.)
+_disallowed_filename_characters = re.compile('[\x00/]')
+
+# '.' and '..' are not reachable if API server is newer than #6277
+def sanitize_filename(dirty):
+    """Replace disallowed filename characters with harmless "_"."""
+    if dirty is None:
+        return None
+    elif dirty == '':
+        return '_'
+    elif dirty == '.':
+        return '_'
+    elif dirty == '..':
+        return '__'
+    else:
+        return _disallowed_filename_characters.sub('_', dirty)
+
+
+class Directory(FreshBase):
+    """Generic directory object, backed by a dict.
+
+    Consists of a set of entries with the key representing the filename
+    and the value referencing a File or Directory object.
+    """
+
+    def __init__(self, parent_inode, inodes):
+        """parent_inode is the integer inode number"""
+
+        super(Directory, self).__init__()
+
+        self.inode = None
+        if not isinstance(parent_inode, int):
+            raise Exception("parent_inode should be an int")
+        self.parent_inode = parent_inode
+        self.inodes = inodes
+        self._entries = {}
+        self._mtime = time.time()
+
+    #  Overriden by subclasses to implement logic to update the entries dict
+    #  when the directory is stale
+    @use_counter
+    def update(self):
+        pass
+
+    # Only used when computing the size of the disk footprint of the directory
+    # (stub)
+    def size(self):
+        return 0
+
+    def persisted(self):
+        return False
+
+    def checkupdate(self):
+        if self.stale():
+            try:
+                self.update()
+            except apiclient.errors.HttpError as e:
+                _logger.warn(e)
+
+    @use_counter
+    @check_update
+    def __getitem__(self, item):
+        return self._entries[item]
+
+    @use_counter
+    @check_update
+    def items(self):
+        return list(self._entries.items())
+
+    @use_counter
+    @check_update
+    def __contains__(self, k):
+        return k in self._entries
+
+    @use_counter
+    @check_update
+    def __len__(self):
+        return len(self._entries)
+
+    def fresh(self):
+        self.inodes.touch(self)
+        super(Directory, self).fresh()
+
+    def merge(self, items, fn, same, new_entry):
+        """Helper method for updating the contents of the directory.
+
+        Takes a list describing the new contents of the directory, reuse
+        entries that are the same in both the old and new lists, create new
+        entries, and delete old entries missing from the new list.
+
+        :items: iterable with new directory contents
+
+        :fn: function to take an entry in 'items' and return the desired file or
+        directory name, or None if this entry should be skipped
+
+        :same: function to compare an existing entry (a File or Directory
+        object) with an entry in the items list to determine whether to keep
+        the existing entry.
+
+        :new_entry: function to create a new directory entry (File or Directory
+        object) from an entry in the items list.
+
+        """
+
+        oldentries = self._entries
+        self._entries = {}
+        changed = False
+        for i in items:
+            name = sanitize_filename(fn(i))
+            if name:
+                if name in oldentries and same(oldentries[name], i):
+                    # move existing directory entry over
+                    self._entries[name] = oldentries[name]
+                    del oldentries[name]
+                else:
+                    _logger.debug("Adding entry '%s' to inode %i", name, self.inode)
+                    # create new directory entry
+                    ent = new_entry(i)
+                    if ent is not None:
+                        self._entries[name] = self.inodes.add_entry(ent)
+                        changed = True
+
+        # delete any other directory entries that were not in found in 'items'
+        for i in oldentries:
+            _logger.debug("Forgetting about entry '%s' on inode %i", i, self.inode)
+            self.inodes.invalidate_entry(self.inode, i.encode(self.inodes.encoding))
+            self.inodes.del_entry(oldentries[i])
+            changed = True
+
+        if changed:
+            self.inodes.invalidate_inode(self.inode)
+            self._mtime = time.time()
+
+        self.fresh()
+
+    def clear(self, force=False):
+        """Delete all entries"""
+
+        if not self.in_use() or force:
+            oldentries = self._entries
+            self._entries = {}
+            for n in oldentries:
+                if not oldentries[n].clear(force):
+                    self._entries = oldentries
+                    return False
+            for n in oldentries:
+                self.inodes.invalidate_entry(self.inode, n.encode(self.inodes.encoding))
+                self.inodes.del_entry(oldentries[n])
+            self.inodes.invalidate_inode(self.inode)
+            self.invalidate()
+            return True
+        else:
+            return False
+
+    def mtime(self):
+        return self._mtime
+
+    def writable(self):
+        return False
+
+    def flush(self):
+        pass
+
+    def create(self, name):
+        raise NotImplementedError()
+
+    def mkdir(self, name):
+        raise NotImplementedError()
+
+    def unlink(self, name):
+        raise NotImplementedError()
+
+    def rmdir(self, name):
+        raise NotImplementedError()
+
+    def rename(self, name_old, name_new, src):
+        raise NotImplementedError()
+
+
+class CollectionDirectoryBase(Directory):
+    """Represent an Arvados Collection as a directory.
+
+    This class is used for Subcollections, and is also the base class for
+    CollectionDirectory, which implements collection loading/saving on
+    Collection records.
+
+    Most operations act only the underlying Arvados `Collection` object.  The
+    `Collection` object signals via a notify callback to
+    `CollectionDirectoryBase.on_event` that an item was added, removed or
+    modified.  FUSE inodes and directory entries are created, deleted or
+    invalidated in response to these events.
+
+    """
+
+    def __init__(self, parent_inode, inodes, collection):
+        super(CollectionDirectoryBase, self).__init__(parent_inode, inodes)
+        self.collection = collection
+
+    def new_entry(self, name, item, mtime):
+        name = sanitize_filename(name)
+        if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+            if item.fuse_entry.dead is not True:
+                raise Exception("Can only reparent dead inode entry")
+            if item.fuse_entry.inode is None:
+                raise Exception("Reparented entry must still have valid inode")
+            item.fuse_entry.dead = False
+            self._entries[name] = item.fuse_entry
+        elif isinstance(item, arvados.collection.RichCollectionBase):
+            self._entries[name] = self.inodes.add_entry(CollectionDirectoryBase(self.inode, self.inodes, item))
+            self._entries[name].populate(mtime)
+        else:
+            self._entries[name] = self.inodes.add_entry(FuseArvadosFile(self.inode, item, mtime))
+        item.fuse_entry = self._entries[name]
+
+    def on_event(self, event, collection, name, item):
+        if collection == self.collection:
+            name = sanitize_filename(name)
+            _logger.debug("collection notify %s %s %s %s", event, collection, name, item)
+            with llfuse.lock:
+                if event == arvados.collection.ADD:
+                    self.new_entry(name, item, self.mtime())
+                elif event == arvados.collection.DEL:
+                    ent = self._entries[name]
+                    del self._entries[name]
+                    self.inodes.invalidate_entry(self.inode, name.encode(self.inodes.encoding))
+                    self.inodes.del_entry(ent)
+                elif event == arvados.collection.MOD:
+                    if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
+                        self.inodes.invalidate_inode(item.fuse_entry.inode)
+                    elif name in self._entries:
+                        self.inodes.invalidate_inode(self._entries[name].inode)
+
+    def populate(self, mtime):
+        self._mtime = mtime
+        self.collection.subscribe(self.on_event)
+        for entry, item in self.collection.items():
+            self.new_entry(entry, item, self.mtime())
+
+    def writable(self):
+        return self.collection.writable()
+
+    @use_counter
+    def flush(self):
+        with llfuse.lock_released:
+            self.collection.root_collection().save()
+
+    @use_counter
+    @check_update
+    def create(self, name):
+        with llfuse.lock_released:
+            self.collection.open(name, "w").close()
+
+    @use_counter
+    @check_update
+    def mkdir(self, name):
+        with llfuse.lock_released:
+            self.collection.mkdirs(name)
+
+    @use_counter
+    @check_update
+    def unlink(self, name):
+        with llfuse.lock_released:
+            self.collection.remove(name)
+        self.flush()
+
+    @use_counter
+    @check_update
+    def rmdir(self, name):
+        with llfuse.lock_released:
+            self.collection.remove(name)
+        self.flush()
+
+    @use_counter
+    @check_update
+    def rename(self, name_old, name_new, src):
+        if not isinstance(src, CollectionDirectoryBase):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        if name_new in self:
+            ent = src[name_old]
+            tgt = self[name_new]
+            if isinstance(ent, FuseArvadosFile) and isinstance(tgt, FuseArvadosFile):
+                pass
+            elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, CollectionDirectoryBase):
+                if len(tgt) > 0:
+                    raise llfuse.FUSEError(errno.ENOTEMPTY)
+            elif isinstance(ent, CollectionDirectoryBase) and isinstance(tgt, FuseArvadosFile):
+                raise llfuse.FUSEError(errno.ENOTDIR)
+            elif isinstance(ent, FuseArvadosFile) and isinstance(tgt, CollectionDirectoryBase):
+                raise llfuse.FUSEError(errno.EISDIR)
+
+        with llfuse.lock_released:
+            self.collection.rename(name_old, name_new, source_collection=src.collection, overwrite=True)
+        self.flush()
+        src.flush()
+
+
+class CollectionDirectory(CollectionDirectoryBase):
+    """Represents the root of a directory tree representing a collection."""
+
+    def __init__(self, parent_inode, inodes, api, num_retries, collection_record=None, explicit_collection=None):
+        super(CollectionDirectory, self).__init__(parent_inode, inodes, None)
+        self.api = api
+        self.num_retries = num_retries
+        self.collection_record_file = None
+        self.collection_record = None
+        if isinstance(collection_record, dict):
+            self.collection_locator = collection_record['uuid']
+            self._mtime = convertTime(collection_record.get('modified_at'))
+        else:
+            self.collection_locator = collection_record
+            self._mtime = 0
+        self._manifest_size = 0
+        if self.collection_locator:
+            self._writable = (uuid_pattern.match(self.collection_locator) is not None)
+        self._updating_lock = threading.Lock()
+
+    def same(self, i):
+        return i['uuid'] == self.collection_locator or i['portable_data_hash'] == self.collection_locator
+
+    def writable(self):
+        return self.collection.writable() if self.collection is not None else self._writable
+
+    # Used by arv-web.py to switch the contents of the CollectionDirectory
+    def change_collection(self, new_locator):
+        """Switch the contents of the CollectionDirectory.
+
+        Must be called with llfuse.lock held.
+        """
+
+        self.collection_locator = new_locator
+        self.collection_record = None
+        self.update()
+
+    def new_collection(self, new_collection_record, coll_reader):
+        if self.inode:
+            self.clear(force=True)
+
+        self.collection_record = new_collection_record
+
+        if self.collection_record:
+            self._mtime = convertTime(self.collection_record.get('modified_at'))
+            self.collection_locator = self.collection_record["uuid"]
+            if self.collection_record_file is not None:
+                self.collection_record_file.update(self.collection_record)
+
+        self.collection = coll_reader
+        self.populate(self.mtime())
+
+    def uuid(self):
+        return self.collection_locator
+
+    @use_counter
+    def update(self, to_record_version=None):
+        try:
+            if self.collection_record is not None and portable_data_hash_pattern.match(self.collection_locator):
+                return True
+
+            if self.collection_locator is None:
+                self.fresh()
+                return True
+
+            try:
+                with llfuse.lock_released:
+                    self._updating_lock.acquire()
+                    if not self.stale():
+                        return
+
+                    _logger.debug("Updating %s", to_record_version)
+                    if self.collection is not None:
+                        if self.collection.known_past_version(to_record_version):
+                            _logger.debug("%s already processed %s", self.collection_locator, to_record_version)
+                        else:
+                            self.collection.update()
+                    else:
+                        if uuid_pattern.match(self.collection_locator):
+                            coll_reader = arvados.collection.Collection(
+                                self.collection_locator, self.api, self.api.keep,
+                                num_retries=self.num_retries)
+                        else:
+                            coll_reader = arvados.collection.CollectionReader(
+                                self.collection_locator, self.api, self.api.keep,
+                                num_retries=self.num_retries)
+                        new_collection_record = coll_reader.api_response() or {}
+                        # If the Collection only exists in Keep, there will be no API
+                        # response.  Fill in the fields we need.
+                        if 'uuid' not in new_collection_record:
+                            new_collection_record['uuid'] = self.collection_locator
+                        if "portable_data_hash" not in new_collection_record:
+                            new_collection_record["portable_data_hash"] = new_collection_record["uuid"]
+                        if 'manifest_text' not in new_collection_record:
+                            new_collection_record['manifest_text'] = coll_reader.manifest_text()
+
+                        if self.collection_record is None or self.collection_record["portable_data_hash"] != new_collection_record.get("portable_data_hash"):
+                            self.new_collection(new_collection_record, coll_reader)
+
+                        self._manifest_size = len(coll_reader.manifest_text())
+                        _logger.debug("%s manifest_size %i", self, self._manifest_size)
+                # end with llfuse.lock_released, re-acquire lock
+
+                self.fresh()
+                return True
+            finally:
+                self._updating_lock.release()
+        except arvados.errors.NotFoundError as e:
+            _logger.error("Error fetching collection '%s': %s", self.collection_locator, e)
+        except arvados.errors.ArgumentError as detail:
+            _logger.warning("arv-mount %s: error %s", self.collection_locator, detail)
+            if self.collection_record is not None and "manifest_text" in self.collection_record:
+                _logger.warning("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
+        except Exception:
+            _logger.exception("arv-mount %s: error", self.collection_locator)
+            if self.collection_record is not None and "manifest_text" in self.collection_record:
+                _logger.error("arv-mount manifest_text is: %s", self.collection_record["manifest_text"])
+        return False
+
+    @use_counter
+    @check_update
+    def __getitem__(self, item):
+        if item == '.arvados#collection':
+            if self.collection_record_file is None:
+                self.collection_record_file = ObjectFile(self.inode, self.collection_record)
+                self.inodes.add_entry(self.collection_record_file)
+            return self.collection_record_file
+        else:
+            return super(CollectionDirectory, self).__getitem__(item)
+
+    def __contains__(self, k):
+        if k == '.arvados#collection':
+            return True
+        else:
+            return super(CollectionDirectory, self).__contains__(k)
+
+    def invalidate(self):
+        self.collection_record = None
+        self.collection_record_file = None
+        super(CollectionDirectory, self).invalidate()
+
+    def persisted(self):
+        return (self.collection_locator is not None)
+
+    def objsize(self):
+        # This is an empirically-derived heuristic to estimate the memory used
+        # to store this collection's metadata.  Calculating the memory
+        # footprint directly would be more accurate, but also more complicated.
+        return self._manifest_size * 128
+
+    def finalize(self):
+        if self.collection is not None:
+            if self.writable():
+                self.collection.save()
+            self.collection.stop_threads()
+
+
+class MagicDirectory(Directory):
+    """A special directory that logically contains the set of all extant keep locators.
+
+    When a file is referenced by lookup(), it is tested to see if it is a valid
+    keep locator to a manifest, and if so, loads the manifest contents as a
+    subdirectory of this directory with the locator as the directory name.
+    Since querying a list of all extant keep locators is impractical, only
+    collections that have already been accessed are visible to readdir().
+
+    """
+
+    README_TEXT = """
+This directory provides access to Arvados collections as subdirectories listed
+by uuid (in the form 'zzzzz-4zz18-1234567890abcde') or portable data hash (in
+the form '1234567890abcdefghijklmnopqrstuv+123').
+
+Note that this directory will appear empty until you attempt to access a
+specific collection subdirectory (such as trying to 'cd' into it), at which
+point the collection will actually be looked up on the server and the directory
+will appear if it exists.
+""".lstrip()
+
+    def __init__(self, parent_inode, inodes, api, num_retries):
+        super(MagicDirectory, self).__init__(parent_inode, inodes)
+        self.api = api
+        self.num_retries = num_retries
+
+    def __setattr__(self, name, value):
+        super(MagicDirectory, self).__setattr__(name, value)
+        # When we're assigned an inode, add a README.
+        if ((name == 'inode') and (self.inode is not None) and
+              (not self._entries)):
+            self._entries['README'] = self.inodes.add_entry(
+                StringFile(self.inode, self.README_TEXT, time.time()))
+            # If we're the root directory, add an identical by_id subdirectory.
+            if self.inode == llfuse.ROOT_INODE:
+                self._entries['by_id'] = self.inodes.add_entry(MagicDirectory(
+                        self.inode, self.inodes, self.api, self.num_retries))
+
+    def __contains__(self, k):
+        if k in self._entries:
+            return True
+
+        if not portable_data_hash_pattern.match(k) and not uuid_pattern.match(k):
+            return False
+
+        try:
+            e = self.inodes.add_entry(CollectionDirectory(
+                    self.inode, self.inodes, self.api, self.num_retries, k))
+
+            if e.update():
+                if k not in self._entries:
+                    self._entries[k] = e
+                else:
+                    self.inodes.del_entry(e)
+                return True
+            else:
+                self.inodes.del_entry(e)
+                return False
+        except Exception as e:
+            _logger.debug('arv-mount exception keep %s', e)
+            self.inodes.del_entry(e)
+            return False
+
+    def __getitem__(self, item):
+        if item in self:
+            return self._entries[item]
+        else:
+            raise KeyError("No collection with id " + item)
+
+    def clear(self, force=False):
+        pass
+
+
+class RecursiveInvalidateDirectory(Directory):
+    def invalidate(self):
+        try:
+            super(RecursiveInvalidateDirectory, self).invalidate()
+            for a in self._entries:
+                self._entries[a].invalidate()
+        except Exception:
+            _logger.exception()
+
+
+class TagsDirectory(RecursiveInvalidateDirectory):
+    """A special directory that contains as subdirectories all tags visible to the user."""
+
+    def __init__(self, parent_inode, inodes, api, num_retries, poll_time=60):
+        super(TagsDirectory, self).__init__(parent_inode, inodes)
+        self.api = api
+        self.num_retries = num_retries
+        self._poll = True
+        self._poll_time = poll_time
+
+    @use_counter
+    def update(self):
+        with llfuse.lock_released:
+            tags = self.api.links().list(
+                filters=[['link_class', '=', 'tag']],
+                select=['name'], distinct=True
+                ).execute(num_retries=self.num_retries)
+        if "items" in tags:
+            self.merge(tags['items'],
+                       lambda i: i['name'],
+                       lambda a, i: a.tag == i['name'],
+                       lambda i: TagDirectory(self.inode, self.inodes, self.api, self.num_retries, i['name'], poll=self._poll, poll_time=self._poll_time))
+
+
+class TagDirectory(Directory):
+    """A special directory that contains as subdirectories all collections visible
+    to the user that are tagged with a particular tag.
+    """
+
+    def __init__(self, parent_inode, inodes, api, num_retries, tag,
+                 poll=False, poll_time=60):
+        super(TagDirectory, self).__init__(parent_inode, inodes)
+        self.api = api
+        self.num_retries = num_retries
+        self.tag = tag
+        self._poll = poll
+        self._poll_time = poll_time
+
+    @use_counter
+    def update(self):
+        with llfuse.lock_released:
+            taggedcollections = self.api.links().list(
+                filters=[['link_class', '=', 'tag'],
+                         ['name', '=', self.tag],
+                         ['head_uuid', 'is_a', 'arvados#collection']],
+                select=['head_uuid']
+                ).execute(num_retries=self.num_retries)
+        self.merge(taggedcollections['items'],
+                   lambda i: i['head_uuid'],
+                   lambda a, i: a.collection_locator == i['head_uuid'],
+                   lambda i: CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid']))
+
+
+class ProjectDirectory(Directory):
+    """A special directory that contains the contents of a project."""
+
+    def __init__(self, parent_inode, inodes, api, num_retries, project_object,
+                 poll=False, poll_time=60):
+        super(ProjectDirectory, self).__init__(parent_inode, inodes)
+        self.api = api
+        self.num_retries = num_retries
+        self.project_object = project_object
+        self.project_object_file = None
+        self.project_uuid = project_object['uuid']
+        self._poll = poll
+        self._poll_time = poll_time
+        self._updating_lock = threading.Lock()
+        self._current_user = None
+
+    def createDirectory(self, i):
+        if collection_uuid_pattern.match(i['uuid']):
+            return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i)
+        elif group_uuid_pattern.match(i['uuid']):
+            return ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i, self._poll, self._poll_time)
+        elif link_uuid_pattern.match(i['uuid']):
+            if i['head_kind'] == 'arvados#collection' or portable_data_hash_pattern.match(i['head_uuid']):
+                return CollectionDirectory(self.inode, self.inodes, self.api, self.num_retries, i['head_uuid'])
+            else:
+                return None
+        elif uuid_pattern.match(i['uuid']):
+            return ObjectFile(self.parent_inode, i)
+        else:
+            return None
+
+    def uuid(self):
+        return self.project_uuid
+
+    @use_counter
+    def update(self):
+        if self.project_object_file == None:
+            self.project_object_file = ObjectFile(self.inode, self.project_object)
+            self.inodes.add_entry(self.project_object_file)
+
+        def namefn(i):
+            if 'name' in i:
+                if i['name'] is None or len(i['name']) == 0:
+                    return None
+                elif collection_uuid_pattern.match(i['uuid']) or group_uuid_pattern.match(i['uuid']):
+                    # collection or subproject
+                    return i['name']
+                elif link_uuid_pattern.match(i['uuid']) and i['head_kind'] == 'arvados#collection':
+                    # name link
+                    return i['name']
+                elif 'kind' in i and i['kind'].startswith('arvados#'):
+                    # something else
+                    return "{}.{}".format(i['name'], i['kind'][8:])
+            else:
+                return None
+
+        def samefn(a, i):
+            if isinstance(a, CollectionDirectory) or isinstance(a, ProjectDirectory):
+                return a.uuid() == i['uuid']
+            elif isinstance(a, ObjectFile):
+                return a.uuid() == i['uuid'] and not a.stale()
+            return False
+
+        try:
+            with llfuse.lock_released:
+                self._updating_lock.acquire()
+                if not self.stale():
+                    return
+
+                if group_uuid_pattern.match(self.project_uuid):
+                    self.project_object = self.api.groups().get(
+                        uuid=self.project_uuid).execute(num_retries=self.num_retries)
+                elif user_uuid_pattern.match(self.project_uuid):
+                    self.project_object = self.api.users().get(
+                        uuid=self.project_uuid).execute(num_retries=self.num_retries)
+
+                contents = arvados.util.list_all(self.api.groups().contents,
+                                                 self.num_retries, uuid=self.project_uuid)
+
+            # end with llfuse.lock_released, re-acquire lock
+
+            self.merge(contents,
+                       namefn,
+                       samefn,
+                       self.createDirectory)
+        finally:
+            self._updating_lock.release()
+
+    @use_counter
+    @check_update
+    def __getitem__(self, item):
+        if item == '.arvados#project':
+            return self.project_object_file
+        else:
+            return super(ProjectDirectory, self).__getitem__(item)
+
+    def __contains__(self, k):
+        if k == '.arvados#project':
+            return True
+        else:
+            return super(ProjectDirectory, self).__contains__(k)
+
+    @use_counter
+    @check_update
+    def writable(self):
+        with llfuse.lock_released:
+            if not self._current_user:
+                self._current_user = self.api.users().current().execute(num_retries=self.num_retries)
+            return self._current_user["uuid"] in self.project_object["writable_by"]
+
+    def persisted(self):
+        return True
+
+    @use_counter
+    @check_update
+    def mkdir(self, name):
+        try:
+            with llfuse.lock_released:
+                self.api.collections().create(body={"owner_uuid": self.project_uuid,
+                                                    "name": name,
+                                                    "manifest_text": ""}).execute(num_retries=self.num_retries)
+            self.invalidate()
+        except apiclient_errors.Error as error:
+            _logger.error(error)
+            raise llfuse.FUSEError(errno.EEXIST)
+
+    @use_counter
+    @check_update
+    def rmdir(self, name):
+        if name not in self:
+            raise llfuse.FUSEError(errno.ENOENT)
+        if not isinstance(self[name], CollectionDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+        if len(self[name]) > 0:
+            raise llfuse.FUSEError(errno.ENOTEMPTY)
+        with llfuse.lock_released:
+            self.api.collections().delete(uuid=self[name].uuid()).execute(num_retries=self.num_retries)
+        self.invalidate()
+
+    @use_counter
+    @check_update
+    def rename(self, name_old, name_new, src):
+        if not isinstance(src, ProjectDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        ent = src[name_old]
+
+        if not isinstance(ent, CollectionDirectory):
+            raise llfuse.FUSEError(errno.EPERM)
+
+        if name_new in self:
+            # POSIX semantics for replacing one directory with another is
+            # tricky (the target directory must be empty, the operation must be
+            # atomic which isn't possible with the Arvados API as of this
+            # writing) so don't support that.
+            raise llfuse.FUSEError(errno.EPERM)
+
+        self.api.collections().update(uuid=ent.uuid(),
+                                      body={"owner_uuid": self.uuid(),
+                                            "name": name_new}).execute(num_retries=self.num_retries)
+
+        # Acually move the entry from source directory to this directory.
+        del src._entries[name_old]
+        self._entries[name_new] = ent
+        self.inodes.invalidate_entry(src.inode, name_old.encode(self.inodes.encoding))
+
+
+class SharedDirectory(Directory):
+    """A special directory that represents users or groups who have shared projects with me."""
+
+    def __init__(self, parent_inode, inodes, api, num_retries, exclude,
+                 poll=False, poll_time=60):
+        super(SharedDirectory, self).__init__(parent_inode, inodes)
+        self.api = api
+        self.num_retries = num_retries
+        self.current_user = api.users().current().execute(num_retries=num_retries)
+        self._poll = True
+        self._poll_time = poll_time
+
+    @use_counter
+    def update(self):
+        with llfuse.lock_released:
+            all_projects = arvados.util.list_all(
+                self.api.groups().list, self.num_retries,
+                filters=[['group_class','=','project']])
+            objects = {}
+            for ob in all_projects:
+                objects[ob['uuid']] = ob
+
+            roots = []
+            root_owners = {}
+            for ob in all_projects:
+                if ob['owner_uuid'] != self.current_user['uuid'] and ob['owner_uuid'] not in objects:
+                    roots.append(ob)
+                    root_owners[ob['owner_uuid']] = True
+
+            lusers = arvados.util.list_all(
+                self.api.users().list, self.num_retries,
+                filters=[['uuid','in', list(root_owners)]])
+            lgroups = arvados.util.list_all(
+                self.api.groups().list, self.num_retries,
+                filters=[['uuid','in', list(root_owners)]])
+
+            users = {}
+            groups = {}
+
+            for l in lusers:
+                objects[l["uuid"]] = l
+            for l in lgroups:
+                objects[l["uuid"]] = l
+
+            contents = {}
+            for r in root_owners:
+                if r in objects:
+                    obr = objects[r]
+                    if obr.get("name"):
+                        contents[obr["name"]] = obr
+                    #elif obr.get("username"):
+                    #    contents[obr["username"]] = obr
+                    elif "first_name" in obr:
+                        contents[u"{} {}".format(obr["first_name"], obr["last_name"])] = obr
+
+
+            for r in roots:
+                if r['owner_uuid'] not in objects:
+                    contents[r['name']] = r
+
+        # end with llfuse.lock_released, re-acquire lock
+
+        try:
+            self.merge(contents.items(),
+                       lambda i: i[0],
+                       lambda a, i: a.uuid() == i[1]['uuid'],
+                       lambda i: ProjectDirectory(self.inode, self.inodes, self.api, self.num_retries, i[1], poll=self._poll, poll_time=self._poll_time))
+        except Exception:
+            _logger.exception()
diff --git a/services/fuse/arvados_fuse/fusefile.py b/services/fuse/arvados_fuse/fusefile.py

new file mode 100644 (file)

index 0000000..4d472cf
--- /dev/null
+++ b/services/fuse/arvados_fuse/fusefile.py
@@ -0,0 +1,101 @@
+import logging
+import re
+import json
+import llfuse
+
+from fresh import FreshBase, convertTime
+
+_logger = logging.getLogger('arvados.arvados_fuse')
+
+class File(FreshBase):
+    """Base for file objects."""
+
+    def __init__(self, parent_inode, _mtime=0):
+        super(File, self).__init__()
+        self.inode = None
+        self.parent_inode = parent_inode
+        self._mtime = _mtime
+
+    def size(self):
+        return 0
+
+    def readfrom(self, off, size, num_retries=0):
+        return ''
+
+    def writeto(self, off, size, num_retries=0):
+        raise Exception("Not writable")
+
+    def mtime(self):
+        return self._mtime
+
+    def clear(self, force=False):
+        return True
+
+    def writable(self):
+        return False
+
+    def flush(self):
+        pass
+
+
+class FuseArvadosFile(File):
+    """Wraps a ArvadosFile."""
+
+    def __init__(self, parent_inode, arvfile, _mtime):
+        super(FuseArvadosFile, self).__init__(parent_inode, _mtime)
+        self.arvfile = arvfile
+
+    def size(self):
+        with llfuse.lock_released:
+            return self.arvfile.size()
+
+    def readfrom(self, off, size, num_retries=0):
+        with llfuse.lock_released:
+            return self.arvfile.readfrom(off, size, num_retries, exact=True)
+
+    def writeto(self, off, buf, num_retries=0):
+        with llfuse.lock_released:
+            return self.arvfile.writeto(off, buf, num_retries)
+
+    def stale(self):
+        return False
+
+    def writable(self):
+        return self.arvfile.writable()
+
+    def flush(self):
+        with llfuse.lock_released:
+            if self.writable():
+                self.arvfile.parent.root_collection().save()
+
+
+class StringFile(File):
+    """Wrap a simple string as a file"""
+    def __init__(self, parent_inode, contents, _mtime):
+        super(StringFile, self).__init__(parent_inode, _mtime)
+        self.contents = contents
+
+    def size(self):
+        return len(self.contents)
+
+    def readfrom(self, off, size, num_retries=0):
+        return self.contents[off:(off+size)]
+
+
+class ObjectFile(StringFile):
+    """Wrap a dict as a serialized json object."""
+
+    def __init__(self, parent_inode, obj):
+        super(ObjectFile, self).__init__(parent_inode, "", 0)
+        self.object_uuid = obj['uuid']
+        self.update(obj)
+
+    def uuid(self):
+        return self.object_uuid
+
+    def update(self, obj=None):
+        self._mtime = convertTime(obj['modified_at']) if 'modified_at' in obj else 0
+        self.contents = json.dumps(obj, indent=4, sort_keys=True) + "\n"
+
+    def persisted(self):
+        return True
diff --git a/services/fuse/bin/arv-mount b/services/fuse/bin/arv-mount

index b540efe82c63b59640e5c6c4d10a44e6de004606..7f9c916c456f49e76744f78ba06b18af2eb1ed1b 100755 (executable)
--- a/services/fuse/bin/arv-mount
+++ b/services/fuse/bin/arv-mount
@@ -12,6 +12,7 @@ import time
  import arvados.commands._util as arv_cmd
  from arvados_fuse import *
  from arvados.safeapi import ThreadSafeApiCache
+import arvados.keep
  
  logger = logging.getLogger('arvados.arv-mount')
  
@@ -45,6 +46,13 @@ with "--".
      parser.add_argument('--logfile', help="""Write debug logs and errors to the specified file (default stderr).""")
      parser.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
      parser.add_argument('--encoding', type=str, help="Character encoding to use for filesystem, default is utf-8 (see Python codec registry for list of available encodings)", default="utf-8")
+
+    parser.add_argument('--file-cache', type=int, help="File data cache size, in bytes (default 256MiB)", default=256*1024*1024)
+    parser.add_argument('--directory-cache', type=int, help="Directory data cache size, in bytes (default 128MiB)", default=128*1024*1024)
+
+    parser.add_argument('--read-only', action='store_false', help="Mount will be read only (default)", dest="enable_write", default=False)
+    parser.add_argument('--read-write', action='store_true', help="Mount will be read-write", dest="enable_write", default=False)
+
      parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
                          dest="exec_args", metavar=('command', 'args', '...', '--'),
                          help="""Mount, run a command, then unmount and exit""")
@@ -79,10 +87,17 @@ with "--".
          arvados.logger.setLevel(logging.DEBUG)
          logger.debug("arv-mount debugging enabled")
  
+    logger.info("enable write is %s", args.enable_write)
+
      try:
          # Create the request handler
-        operations = Operations(os.getuid(), os.getgid(), args.encoding)
-        api = ThreadSafeApiCache(arvados.config.settings())
+        operations = Operations(os.getuid(),
+                                os.getgid(),
+                                encoding=args.encoding,
+                                inode_cache=InodeCache(cap=args.directory_cache),
+                                enable_write=args.enable_write)
+        api = ThreadSafeApiCache(apiconfig=arvados.config.settings(),
+                                 keep_params={"block_cache": arvados.keep.KeepBlockCache(args.file_cache)})
  
          usr = api.users().current().execute(num_retries=args.retries)
          now = time.time()
@@ -112,7 +127,7 @@ with "--".
          if dir_class is not None:
              operations.inodes.add_entry(dir_class(*dir_args))
          else:
-            e = operations.inodes.add_entry(Directory(llfuse.ROOT_INODE))
+            e = operations.inodes.add_entry(Directory(llfuse.ROOT_INODE, operations.inodes))
              dir_args[0] = e.inode
  
              e._entries['by_id'] = operations.inodes.add_entry(MagicDirectory(*dir_args))
@@ -147,10 +162,16 @@ From here, the following directories are available:
      opts = [optname for optname in ['allow_other', 'debug']
              if getattr(args, optname)]
  
+    # Increase default read/write size from 4KiB to 128KiB
+    opts += ["big_writes", "max_read=131072"]
+
      if args.exec_args:
          # Initialize the fuse connection
          llfuse.init(operations, args.mountpoint, opts)
  
+        # Subscribe to change events from API server
+        operations.listen_for_events(api)
+
          t = threading.Thread(None, lambda: llfuse.main())
          t.start()
  
@@ -182,12 +203,19 @@ From here, the following directories are available:
                  pass
          finally:
              subprocess.call(["fusermount", "-u", "-z", args.mountpoint])
+            operations.destroy()
  
          exit(rc)
      else:
          try:
              llfuse.init(operations, args.mountpoint, opts)
+
+            # Subscribe to change events from API server
+            operations.listen_for_events(api)
+
              llfuse.main()
          except Exception as e:
              logger.exception('arv-mount: exception during mount')
              exit(getattr(e, 'errno', 1))
+        finally:
+            operations.destroy()
diff --git a/services/fuse/setup.py b/services/fuse/setup.py

index a7ae1c2e0694682857ca60b301e8ab4fb8a8d6fb..3833493598cf05e288dc31f0153732dbccdca907 100644 (file)
--- a/services/fuse/setup.py
+++ b/services/fuse/setup.py
@@ -29,13 +29,13 @@ setup(name='arvados_fuse',
          'bin/arv-mount'
          ],
        install_requires=[
-        'arvados-python-client>=0.1.20150303143450',
-        'llfuse',
+        'arvados-python-client >= 0.1.20150625175218',
+        'llfuse>=0.40',
          'python-daemon',
          'ciso8601'
          ],
        test_suite='tests',
-      tests_require=['PyYAML'],
+      tests_require=['mock>=1.0', 'PyYAML'],
        zip_safe=False,
        cmdclass={'egg_info': tagger},
        )
diff --git a/services/fuse/tests/fstest.py b/services/fuse/tests/fstest.py

new file mode 100644 (file)

index 0000000..cf081b7
--- /dev/null
+++ b/services/fuse/tests/fstest.py
@@ -0,0 +1,133 @@
+from multiprocessing import Process
+import os
+import subprocess
+import sys
+import prof
+
+def fn(n):
+    return "file%i" % n
+
+def createfiles(d, n):
+    for j in xrange(1, 5):
+        print "Starting small file %s %i, %i" % (d, n, j)
+        if d:
+            os.mkdir(d)
+            ld = os.listdir('.')
+            if d not in ld:
+                print "ERROR %s missing" % d
+            os.chdir(d)
+
+        for i in xrange(n, n+10):
+            with open(fn(i), "w") as f:
+                f.write(fn(i))
+
+        ld = os.listdir('.')
+        for i in xrange(n, n+10):
+            if fn(i) not in ld:
+                print "ERROR %s missing" % fn(i)
+
+        for i in xrange(n, n+10):
+            with open(fn(i), "r") as f:
+                if f.read() != fn(i):
+                    print "ERROR %s doesn't have expected contents" % fn(i)
+
+        for i in xrange(n, n+10):
+            os.remove(fn(i))
+
+        ld = os.listdir('.')
+        for i in xrange(n, n+10):
+            if fn(i) in ld:
+                print "ERROR %s should have been removed" % fn(i)
+
+        if d:
+            os.chdir('..')
+            os.rmdir(d)
+            ld = os.listdir('.')
+            if d in ld:
+                print "ERROR %s should have been removed" % d
+
+
+def createbigfile(d, n):
+    for j in xrange(1, 5):
+        print "Starting big file %s %i, %i" % (d, n, j)
+        i = n
+        if d:
+            os.mkdir(d)
+            ld = os.listdir('.')
+            if d not in ld:
+                print "ERROR %s missing" % d
+            os.chdir(d)
+
+        with open(fn(i), "w") as f:
+            for j in xrange(0, 1000):
+                f.write((str(j) + fn(i)) * 10000)
+
+        ld = os.listdir('.')
+        if fn(i) not in ld:
+            print "ERROR %s missing" % fn(i)
+
+        with open(fn(i), "r") as f:
+            for j in xrange(0, 1000):
+                expect = (str(j) + fn(i)) * 10000
+                if f.read(len(expect)) != expect:
+                    print "ERROR %s doesn't have expected contents" % fn(i)
+
+        os.remove(fn(i))
+
+        ld = os.listdir('.')
+        if fn(i) in ld:
+            print "ERROR %s should have been removed" % fn(i)
+
+        if d:
+            os.chdir('..')
+            os.rmdir(d)
+            ld = os.listdir('.')
+            if d in ld:
+                print "ERROR %s should have been removed" % d
+
+def do_ls():
+    with open("/dev/null", "w") as nul:
+        for j in xrange(1, 50):
+            subprocess.call(["ls", "-l"], stdout=nul, stderr=nul)
+
+def runit(target, indir):
+    procs = []
+    for n in xrange(0, 20):
+        if indir:
+            p = Process(target=target, args=("dir%i" % n, n*10,))
+        else:
+            p = Process(target=target, args=("", n*10,))
+        p.start()
+        procs.append(p)
+
+    p = Process(target=do_ls, args=())
+    p.start()
+    procs.append(p)
+
+    for p in procs:
+        p.join()
+
+    if os.listdir('.'):
+        print "ERROR there are left over files in the directory"
+
+
+if __name__ == '__main__':
+    if os.listdir('.'):
+        print "ERROR starting directory is not empty"
+        sys.exit()
+
+    print "Single directory small files"
+    with prof.CountTime():
+        runit(createfiles, False)
+
+    print "Separate directories small files"
+    with prof.CountTime():
+        runit(createfiles, True)
+
+    print "Single directory large files"
+    with prof.CountTime():
+        runit(createbigfile, False)
+
+    print "Separate directories large files"
+    with prof.CountTime():
+        runit(createbigfile, True)
diff --git a/services/fuse/tests/mount_test_base.py b/services/fuse/tests/mount_test_base.py

new file mode 100644 (file)

index 0000000..3b7cbaa
--- /dev/null
+++ b/services/fuse/tests/mount_test_base.py
@@ -0,0 +1,72 @@
+import arvados
+import arvados.safeapi
+import arvados_fuse as fuse
+import llfuse
+import os
+import shutil
+import subprocess
+import sys
+import tempfile
+import threading
+import time
+import unittest
+import logging
+import multiprocessing
+import run_test_server
+
+logger = logging.getLogger('arvados.arv-mount')
+
+class MountTestBase(unittest.TestCase):
+    def setUp(self, api=None):
+        # The underlying C implementation of open() makes a fstat() syscall
+        # with the GIL still held.  When the GETATTR message comes back to
+        # llfuse (which in these tests is in the same interpreter process) it
+        # can't acquire the GIL, so it can't service the fstat() call, so it
+        # deadlocks.  The workaround is to run some of our test code in a
+        # separate process.  Forturnately the multiprocessing module makes this
+        # relatively easy.
+        self.pool = multiprocessing.Pool(1)
+
+        self.keeptmp = tempfile.mkdtemp()
+        os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
+        self.mounttmp = tempfile.mkdtemp()
+        run_test_server.run()
+        run_test_server.authorize_with("admin")
+        self.api = api if api else arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
+
+    def make_mount(self, root_class, **root_kwargs):
+        self.operations = fuse.Operations(os.getuid(), os.getgid(), enable_write=True)
+        self.operations.inodes.add_entry(root_class(
+            llfuse.ROOT_INODE, self.operations.inodes, self.api, 0, **root_kwargs))
+        llfuse.init(self.operations, self.mounttmp, [])
+        threading.Thread(None, llfuse.main).start()
+        # wait until the driver is finished initializing
+        self.operations.initlock.wait()
+        return self.operations.inodes[llfuse.ROOT_INODE]
+
+    def tearDown(self):
+        self.pool.terminate()
+        self.pool.join()
+        del self.pool
+
+        # llfuse.close is buggy, so use fusermount instead.
+        #llfuse.close(unmount=True)
+
+        count = 0
+        success = 1
+        while (count < 9 and success != 0):
+          success = subprocess.call(["fusermount", "-u", self.mounttmp])
+          time.sleep(0.1)
+          count += 1
+
+        self.operations.destroy()
+
+        os.rmdir(self.mounttmp)
+        shutil.rmtree(self.keeptmp)
+        run_test_server.reset()
+
+    def assertDirContents(self, subdir, expect_content):
+        path = self.mounttmp
+        if subdir:
+            path = os.path.join(path, subdir)
+        self.assertEqual(sorted(expect_content), sorted(llfuse.listdir(path)))
diff --git a/services/fuse/tests/performance/__init__.py b/services/fuse/tests/performance/__init__.py

new file mode 100644 (file)

index 0000000..e69de29
diff --git a/services/fuse/tests/performance/performance_profiler.py b/services/fuse/tests/performance/performance_profiler.py

new file mode 120000 (symlink)

index 0000000..01a6805
--- /dev/null
+++ b/services/fuse/tests/performance/performance_profiler.py
@@ -0,0 +1 @@
+../../../../sdk/python/tests/performance/performance_profiler.py
+\ No newline at end of file
diff --git a/services/fuse/tests/performance/test_collection_performance.py b/services/fuse/tests/performance/test_collection_performance.py

new file mode 100644 (file)

index 0000000..c4eadca
--- /dev/null
+++ b/services/fuse/tests/performance/test_collection_performance.py
@@ -0,0 +1,477 @@
+import arvados
+import arvados_fuse as fuse
+import llfuse
+import logging
+import os
+import sys
+import unittest
+from .. import run_test_server
+from ..mount_test_base import MountTestBase
+
+logger = logging.getLogger('arvados.arv-mount')
+
+from performance_profiler import profiled
+
+def fuse_createCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.createCollectionWithMultipleBlocks()
+
+        @profiled
+        def createCollectionWithMultipleBlocks(self):
+            for i in range(0, streams):
+                os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+                # Create files
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+                        f.write(data)
+
+    Test().runTest()
+
+def fuse_readContentsFromCollectionWithMultipleBlocks(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.readContentsFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def readContentsFromCollectionWithMultipleBlocks(self):
+            for i in range(0, streams):
+                d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+                        self.assertEqual(data, f.read())
+
+    Test().runTest()
+
+def fuse_moveFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.moveFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def moveFileFromCollectionWithMultipleBlocks(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertIn(filename, d1)
+
+            os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertNotIn(filename, d1)
+
+    Test().runTest()
+
+def fuse_deleteFileFromCollectionWithMultipleBlocks(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.deleteFileFromCollectionWithMultipleBlocks()
+
+        @profiled
+        def deleteFileFromCollectionWithMultipleBlocks(self):
+            os.remove(os.path.join(mounttmp, stream, filename))
+
+    Test().runTest()
+
+# Create a collection with 2 streams, 3 files_per_stream, 2 blocks_per_file, 2**26 bytes_per_block
+class CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(CreateCollectionWithMultipleBlocksAndMoveAndDeleteFile, self).setUp()
+
+    def test_CreateCollectionWithManyBlocksAndMoveAndDeleteFile(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        streams = 2
+        files_per_stream = 3
+        blocks_per_file = 2
+        bytes_per_block = 2**26
+
+        data = 'x' * blocks_per_file * bytes_per_block
+
+        self.pool.apply(fuse_createCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        for i in range(0, streams):
+            self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+        for i in range(0, files_per_stream):
+            self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+        # Read file contents
+        self.pool.apply(fuse_readContentsFromCollectionWithMultipleBlocks, (self.mounttmp, streams, files_per_stream, data,))
+
+        # Move file0.txt out of the streams into .
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        for i in range(0, streams):
+            self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+        for i in range(0, streams):
+            for j in range(1, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+        # Delete 'file1.txt' from all the streams
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithMultipleBlocks, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+        for i in range(0, streams):
+            for j in range(2, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def fuse_createCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.createCollectionWithManyFiles()
+
+        @profiled
+        def createCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                os.mkdir(os.path.join(mounttmp, "./stream" + str(i)))
+
+                # Create files
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, "./stream" + str(i), "file" + str(j) +".txt"), "w") as f:
+                        f.write(data)
+
+    Test().runTest()
+
+def fuse_readContentsFromCollectionWithManyFiles(mounttmp, streams=1, files_per_stream=1, data='x'):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.readContentsFromCollectionWithManyFiles()
+
+        @profiled
+        def readContentsFromCollectionWithManyFiles(self):
+            for i in range(0, streams):
+                d1 = llfuse.listdir(os.path.join(mounttmp, 'stream'+str(i)))
+                for j in range(0, files_per_stream):
+                    with open(os.path.join(mounttmp, 'stream'+str(i), 'file'+str(i)+'.txt')) as f:
+                        self.assertEqual(data, f.read())
+
+    Test().runTest()
+
+def fuse_moveFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.moveFileFromCollectionWithManyFiles()
+
+        @profiled
+        def moveFileFromCollectionWithManyFiles(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertIn(filename, d1)
+
+            os.rename(os.path.join(mounttmp, stream, filename), os.path.join(mounttmp, 'moved_from_'+stream+'_'+filename))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertIn('moved_from_'+stream+'_'+filename, d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, stream))
+            self.assertNotIn(filename, d1)
+
+    Test().runTest()
+
+def fuse_deleteFileFromCollectionWithManyFiles(mounttmp, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.deleteFileFromCollectionWithManyFiles()
+
+        @profiled
+        def deleteFileFromCollectionWithManyFiles(self):
+            os.remove(os.path.join(mounttmp, stream, filename))
+
+    Test().runTest()
+
+# Create a collection with two streams, each with 200 files
+class CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+    def test_CreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        self.pool.apply(fuse_createCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        for i in range(0, streams):
+            self.assertIn('./stream' + str(i), collection2["manifest_text"])
+
+        for i in range(0, files_per_stream):
+            self.assertIn('file' + str(i) + '.txt', collection2["manifest_text"])
+
+        # Read file contents
+        self.pool.apply(fuse_readContentsFromCollectionWithManyFiles, (self.mounttmp, streams, files_per_stream, data,))
+
+        # Move file0.txt out of the streams into .
+        for i in range(0, streams):
+            self.pool.apply(fuse_moveFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file0.txt',))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        for i in range(0, streams):
+            self.assertNotIn('file0.txt', manifest_streams[i+1])
+
+        for i in range(0, streams):
+            for j in range(1, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+        # Delete 'file1.txt' from all the streams
+        for i in range(0, streams):
+            self.pool.apply(fuse_deleteFileFromCollectionWithManyFiles, (self.mounttmp, 'stream'+str(i), 'file1.txt'))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+
+        manifest_streams = collection2['manifest_text'].split('\n')
+        self.assertEqual(4, len(manifest_streams))
+
+        for i in range(0, streams):
+            self.assertIn('file0.txt', manifest_streams[0])
+
+        self.assertNotIn('file1.txt', collection2['manifest_text'])
+
+        for i in range(0, streams):
+            for j in range(2, files_per_stream):
+                self.assertIn('file' + str(j) + '.txt', manifest_streams[i+1])
+
+
+def magicDirTest_MoveFileFromCollection(mounttmp, collection1, collection2, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_moveFileFromCollection()
+
+        @profiled
+        def magicDirTest_moveFileFromCollection(self):
+            os.rename(os.path.join(mounttmp, collection1, filename), os.path.join(mounttmp, collection2, filename))
+
+    Test().runTest()
+
+def magicDirTest_RemoveFileFromCollection(mounttmp, collection1, stream, filename):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_removeFileFromCollection()
+
+        @profiled
+        def magicDirTest_removeFileFromCollection(self):
+            os.remove(os.path.join(mounttmp, collection1, filename))
+
+    Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAndDeleteFile, self).setUp()
+
+    @profiled
+    def magicDirTest_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    @profiled
+    def magicDirTest_readCollectionContents(self, collection, streams=1, files_per_stream=1, data='x'):
+        mount_ls = os.listdir(os.path.join(self.mounttmp, collection))
+
+        files = {}
+        for j in range(0, files_per_stream):
+            files[os.path.join(self.mounttmp, collection, 'file'+str(j)+'.txt')] = data
+
+        for k, v in files.items():
+            with open(os.path.join(self.mounttmp, collection, k)) as f:
+                self.assertEqual(v, f.read())
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAndDeleteFile(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTest_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTest_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        self.magicDirTest_readCollectionContents(collection2.manifest_locator(), streams, files_per_stream, data)
+
+        # Move file0.txt out of the collection2 into collection1
+        self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(),
+              collection1.manifest_locator(), 'stream0', 'file0.txt',))
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        self.assertFalse('file0.txt' in updated_collection['manifest_text'])
+        self.assertTrue('file1.txt' in updated_collection['manifest_text'])
+
+        # Delete file1.txt from collection2
+        self.pool.apply(magicDirTest_RemoveFileFromCollection, (self.mounttmp, collection2.manifest_locator(), 'stream0', 'file1.txt',))
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        self.assertFalse('file1.txt' in updated_collection['manifest_text'])
+        self.assertTrue('file2.txt' in updated_collection['manifest_text'])
+
+
+def magicDirTest_MoveAllFilesFromCollection(mounttmp, from_collection, to_collection, stream, files_per_stream):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            self.magicDirTest_moveAllFilesFromCollection()
+
+        @profiled
+        def magicDirTest_moveAllFilesFromCollection(self):
+            for j in range(0, files_per_stream):
+                os.rename(os.path.join(mounttmp, from_collection, 'file'+str(j)+'.txt'), os.path.join(mounttmp, to_collection, 'file'+str(j)+'.txt'))
+
+    Test().runTest()
+
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveAllFilesIntoAnother, self).setUp()
+
+    @profiled
+    def magicDirTestMoveAllFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0,
+            blocks_per_file=0, bytes_per_block=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveAllFilesIntoAnother(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTestMoveAllFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        # Move all files from collection2 into collection1
+        self.pool.apply(magicDirTest_MoveAllFilesFromCollection, (self.mounttmp, collection2.manifest_locator(),
+                  collection1.manifest_locator(), 'stream0', files_per_stream,))
+
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+        for name in file_names:
+            self.assertFalse(name in updated_collection['manifest_text'])
+
+        updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+        for name in file_names:
+            self.assertTrue(name in updated_collection['manifest_text'])
+
+
+# Move one file at a time from one collection into another
+class UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother(MountTestBase):
+    def setUp(self):
+        super(UsingMagicDir_CreateCollectionWithManyFilesAndMoveEachFileIntoAnother, self).setUp()
+
+    @profiled
+    def magicDirTestMoveFiles_createCollectionWithManyFiles(self, streams=0, files_per_stream=0, data='x'):
+        # Create collection
+        collection = arvados.collection.Collection(api_client=self.api)
+        for j in range(0, files_per_stream):
+            with collection.open("file"+str(j)+".txt", "w") as f:
+                f.write(data)
+        collection.save_new()
+        return collection
+
+    def magicDirTestMoveFiles_oneEachIntoAnother(self, from_collection, to_collection, files_per_stream):
+        for j in range(0, files_per_stream):
+            self.pool.apply(magicDirTest_MoveFileFromCollection, (self.mounttmp, from_collection.manifest_locator(),
+                  to_collection.manifest_locator(), 'stream0', 'file'+str(j)+'.txt',))
+
+    def test_UsingMagicDirCreateCollectionWithManyFilesAndMoveEachFileIntoAnother(self):
+        streams = 2
+        files_per_stream = 200
+        data = 'x'
+
+        collection1 = self.magicDirTestMoveFiles_createCollectionWithManyFiles()
+        # Create collection with multiple files
+        collection2 = self.magicDirTestMoveFiles_createCollectionWithManyFiles(streams, files_per_stream, data)
+
+        # Mount FuseMagicDir
+        self.make_mount(fuse.MagicDirectory)
+
+        # Move all files from collection2 into collection1
+        self.magicDirTestMoveFiles_oneEachIntoAnother(collection2, collection1, files_per_stream)
+
+        updated_collection = self.api.collections().get(uuid=collection2.manifest_locator()).execute()
+        file_names = ["file%i.txt" % i for i in range(0, files_per_stream)]
+        for name in file_names:
+            self.assertFalse(name in updated_collection['manifest_text'])
+
+        updated_collection = self.api.collections().get(uuid=collection1.manifest_locator()).execute()
+        for name in file_names:
+            self.assertTrue(name in updated_collection['manifest_text'])
+
+class FuseListLargeProjectContents(MountTestBase):
+    @profiled
+    def getProjectWithManyCollections(self):
+        project_contents = llfuse.listdir(self.mounttmp)
+        self.assertEqual(201, len(project_contents))
+        self.assertIn('Collection_1', project_contents)
+        return project_contents
+
+    @profiled
+    def listContentsInProjectWithManyCollections(self, project_contents):
+        project_contents = llfuse.listdir(self.mounttmp)
+        self.assertEqual(201, len(project_contents))
+        self.assertIn('Collection_1', project_contents)
+
+        for collection_name in project_contents:
+            collection_contents = llfuse.listdir(os.path.join(self.mounttmp, collection_name))
+            self.assertIn('baz', collection_contents)
+
+    def test_listLargeProjectContents(self):
+        self.make_mount(fuse.ProjectDirectory,
+                        project_object=run_test_server.fixture('groups')['project_with_201_collections'])
+        project_contents = self.getProjectWithManyCollections()
+        self.listContentsInProjectWithManyCollections(project_contents)
diff --git a/services/fuse/tests/prof.py b/services/fuse/tests/prof.py

new file mode 100644 (file)

index 0000000..49b9f24
--- /dev/null
+++ b/services/fuse/tests/prof.py
@@ -0,0 +1,17 @@
+import time
+
+class CountTime(object):
+    def __init__(self, tag="", size=None):
+        self.tag = tag
+        self.size = size
+
+    def __enter__(self):
+        self.start = time.time()
+        return self
+
+    def __exit__(self, exc_type, exc_value, traceback):
+        sec = (time.time() - self.start)
+        th = ""
+        if self.size:
+            th = "throughput %s/sec" % (self.size / sec)
+        print "%s time %s micoseconds %s" % (self.tag, sec*1000000, th)
diff --git a/services/fuse/tests/test_inodes.py b/services/fuse/tests/test_inodes.py

new file mode 100644 (file)

index 0000000..61170d5
--- /dev/null
+++ b/services/fuse/tests/test_inodes.py
@@ -0,0 +1,149 @@
+import arvados_fuse
+import mock
+import unittest
+import llfuse
+
+class InodeTests(unittest.TestCase):
+    def test_inodes_basic(self):
+        cache = arvados_fuse.InodeCache(1000, 4)
+        inodes = arvados_fuse.Inodes(cache)
+
+        # Check that ent1 gets added to inodes
+        ent1 = mock.MagicMock()
+        ent1.in_use.return_value = False
+        ent1.persisted.return_value = True
+        ent1.clear.return_value = True
+        ent1.objsize.return_value = 500
+        inodes.add_entry(ent1)
+        self.assertIn(ent1.inode, inodes)
+        self.assertIs(inodes[ent1.inode], ent1)
+        self.assertEqual(500, cache.total())
+
+    def test_inodes_not_persisted(self):
+        cache = arvados_fuse.InodeCache(1000, 4)
+        inodes = arvados_fuse.Inodes(cache)
+
+        ent1 = mock.MagicMock()
+        ent1.in_use.return_value = False
+        ent1.persisted.return_value = True
+        ent1.clear.return_value = True
+        ent1.objsize.return_value = 500
+        inodes.add_entry(ent1)
+
+        # ent2 is not persisted, so it doesn't
+        # affect the cache total
+        ent2 = mock.MagicMock()
+        ent2.in_use.return_value = False
+        ent2.persisted.return_value = False
+        ent2.objsize.return_value = 600
+        inodes.add_entry(ent2)
+        self.assertEqual(500, cache.total())
+
+    def test_inode_cleared(self):
+        cache = arvados_fuse.InodeCache(1000, 4)
+        inodes = arvados_fuse.Inodes(cache)
+
+        # Check that ent1 gets added to inodes
+        ent1 = mock.MagicMock()
+        ent1.in_use.return_value = False
+        ent1.persisted.return_value = True
+        ent1.clear.return_value = True
+        ent1.objsize.return_value = 500
+        inodes.add_entry(ent1)
+
+        # ent3 is persisted, adding it should cause ent1 to get cleared
+        ent3 = mock.MagicMock()
+        ent3.in_use.return_value = False
+        ent3.persisted.return_value = True
+        ent3.objsize.return_value = 600
+        ent3.clear.return_value = True
+
+        self.assertFalse(ent1.clear.called)
+        inodes.add_entry(ent3)
+
+        # Won't clear anything because min_entries = 4
+        self.assertEqual(2, len(cache._entries))
+        self.assertFalse(ent1.clear.called)
+        self.assertEqual(1100, cache.total())
+
+        # Change min_entries
+        cache.min_entries = 1
+        cache.cap_cache()
+        self.assertEqual(600, cache.total())
+        self.assertTrue(ent1.clear.called)
+
+        # Touching ent1 should cause ent3 to get cleared
+        self.assertFalse(ent3.clear.called)
+        cache.touch(ent1)
+        self.assertTrue(ent3.clear.called)
+        self.assertEqual(500, cache.total())
+
+    def test_clear_false(self):
+        cache = arvados_fuse.InodeCache(1000, 4)
+        inodes = arvados_fuse.Inodes(cache)
+
+        ent1 = mock.MagicMock()
+        ent1.in_use.return_value = False
+        ent1.persisted.return_value = True
+        ent1.clear.return_value = True
+        ent1.objsize.return_value = 500
+        inodes.add_entry(ent1)
+
+        ent3 = mock.MagicMock()
+        ent3.in_use.return_value = False
+        ent3.persisted.return_value = True
+        ent3.objsize.return_value = 600
+        ent3.clear.return_value = True
+        inodes.add_entry(ent3)
+
+        cache.min_entries = 1
+
+        # ent1, ent3 clear return false, can't be cleared
+        ent1.clear.return_value = False
+        ent3.clear.return_value = False
+        ent1.clear.called = False
+        ent3.clear.called = False
+        self.assertFalse(ent1.clear.called)
+        self.assertFalse(ent3.clear.called)
+        cache.touch(ent3)
+        self.assertTrue(ent1.clear.called)
+        self.assertTrue(ent3.clear.called)
+        self.assertEqual(1100, cache.total())
+
+        # ent1 clear return false, so ent3
+        # gets cleared
+        ent1.clear.return_value = False
+        ent3.clear.return_value = True
+        ent1.clear.called = False
+        ent3.clear.called = False
+        cache.touch(ent3)
+        self.assertTrue(ent1.clear.called)
+        self.assertTrue(ent3.clear.called)
+        self.assertEqual(500, cache.total())
+
+    def test_delete(self):
+        cache = arvados_fuse.InodeCache(1000, 4)
+        inodes = arvados_fuse.Inodes(cache)
+
+        ent1 = mock.MagicMock()
+        ent1.in_use.return_value = False
+        ent1.persisted.return_value = True
+        ent1.clear.return_value = True
+        ent1.objsize.return_value = 500
+        inodes.add_entry(ent1)
+
+        ent3 = mock.MagicMock()
+        ent3.in_use.return_value = False
+        ent3.persisted.return_value = True
+        ent3.objsize.return_value = 600
+        ent3.clear.return_value = True
+
+        # Delete ent1
+        self.assertEqual(500, cache.total())
+        ent1.clear.return_value = True
+        ent1.ref_count = 0
+        with llfuse.lock:
+            inodes.del_entry(ent1)
+        self.assertEqual(0, cache.total())
+        cache.touch(ent3)
+        self.assertEqual(600, cache.total())
diff --git a/services/fuse/tests/test_mount.py b/services/fuse/tests/test_mount.py

index 764a099149a452ffe4258f2f8a0b922fd381b83a..ff8883714512c8a8f6b6a0196a19cb128204d317 100644 (file)
--- a/services/fuse/tests/test_mount.py
+++ b/services/fuse/tests/test_mount.py
@@ -12,46 +12,14 @@ import tempfile
  import threading
  import time
  import unittest
-
+import logging
+import multiprocessing
  import run_test_server
+import mock
  
-class MountTestBase(unittest.TestCase):
-    def setUp(self):
-        self.keeptmp = tempfile.mkdtemp()
-        os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
-        self.mounttmp = tempfile.mkdtemp()
-        run_test_server.run()
-        run_test_server.authorize_with("admin")
-        self.api = arvados.safeapi.ThreadSafeApiCache(arvados.config.settings())
-
-    def make_mount(self, root_class, **root_kwargs):
-        operations = fuse.Operations(os.getuid(), os.getgid())
-        operations.inodes.add_entry(root_class(
-            llfuse.ROOT_INODE, operations.inodes, self.api, 0, **root_kwargs))
-        llfuse.init(operations, self.mounttmp, [])
-        threading.Thread(None, llfuse.main).start()
-        # wait until the driver is finished initializing
-        operations.initlock.wait()
-
-    def tearDown(self):
-        # llfuse.close is buggy, so use fusermount instead.
-        #llfuse.close(unmount=True)
-        count = 0
-        success = 1
-        while (count < 9 and success != 0):
-          success = subprocess.call(["fusermount", "-u", self.mounttmp])
-          time.sleep(0.5)
-          count += 1
-
-        os.rmdir(self.mounttmp)
-        shutil.rmtree(self.keeptmp)
-        run_test_server.reset()
-
-    def assertDirContents(self, subdir, expect_content):
-        path = self.mounttmp
-        if subdir:
-            path = os.path.join(path, subdir)
-        self.assertEqual(sorted(expect_content), sorted(os.listdir(path)))
+from mount_test_base import MountTestBase
+
+logger = logging.getLogger('arvados.arv-mount')
  
  
  class FuseMountTest(MountTestBase):
@@ -64,8 +32,8 @@ class FuseMountTest(MountTestBase):
          cw.write("data 1")
          cw.start_new_file('thing2.txt')
          cw.write("data 2")
-        cw.start_new_stream('dir1')
  
+        cw.start_new_stream('dir1')
          cw.start_new_file('thing3.txt')
          cw.write("data 3")
          cw.start_new_file('thing4.txt')
@@ -85,11 +53,11 @@ class FuseMountTest(MountTestBase):
          cw.write("data 8")
  
          cw.start_new_stream('edgecases')
-        for f in ":/./../.../-/*/\x01\\/ ".split("/"):
+        for f in ":/.../-/*/\x01\\/ ".split("/"):
              cw.start_new_file(f)
              cw.write('x')
  
-        for f in ":/../.../-/*/\x01\\/ ".split("/"):
+        for f in ":/.../-/*/\x01\\/ ".split("/"):
              cw.start_new_stream('edgecases/dirs/' + f)
              cw.start_new_file('x/x')
              cw.write('x')
@@ -98,7 +66,7 @@ class FuseMountTest(MountTestBase):
          self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
  
      def runTest(self):
-        self.make_mount(fuse.CollectionDirectory, collection=self.testcollection)
+        self.make_mount(fuse.CollectionDirectory, collection_record=self.testcollection)
  
          self.assertDirContents(None, ['thing1.txt', 'thing2.txt',
                                        'edgecases', 'dir1', 'dir2'])
@@ -106,9 +74,9 @@ class FuseMountTest(MountTestBase):
          self.assertDirContents('dir2', ['thing5.txt', 'thing6.txt', 'dir3'])
          self.assertDirContents('dir2/dir3', ['thing7.txt', 'thing8.txt'])
          self.assertDirContents('edgecases',
-                               "dirs/:/_/__/.../-/*/\x01\\/ ".split("/"))
+                               "dirs/:/.../-/*/\x01\\/ ".split("/"))
          self.assertDirContents('edgecases/dirs',
-                               ":/__/.../-/*/\x01\\/ ".split("/"))
+                               ":/.../-/*/\x01\\/ ".split("/"))
  
          files = {'thing1.txt': 'data 1',
                   'thing2.txt': 'data 2',
@@ -143,8 +111,8 @@ class FuseNoAPITest(MountTestBase):
  
  
  class FuseMagicTest(MountTestBase):
-    def setUp(self):
-        super(FuseMagicTest, self).setUp()
+    def setUp(self, api=None):
+        super(FuseMagicTest, self).setUp(api=api)
  
          cw = arvados.CollectionWriter()
  
@@ -152,12 +120,13 @@ class FuseMagicTest(MountTestBase):
          cw.write("data 1")
  
          self.testcollection = cw.finish()
-        self.api.collections().create(body={"manifest_text":cw.manifest_text()}).execute()
+        self.test_manifest = cw.manifest_text()
+        self.api.collections().create(body={"manifest_text":self.test_manifest}).execute()
  
      def runTest(self):
          self.make_mount(fuse.MagicDirectory)
  
-        mount_ls = os.listdir(self.mounttmp)
+        mount_ls = llfuse.listdir(self.mounttmp)
          self.assertIn('README', mount_ls)
          self.assertFalse(any(arvados.util.keep_locator_pattern.match(fn) or
                               arvados.util.uuid_pattern.match(fn)
@@ -166,11 +135,11 @@ class FuseMagicTest(MountTestBase):
          self.assertDirContents(self.testcollection, ['thing1.txt'])
          self.assertDirContents(os.path.join('by_id', self.testcollection),
                                 ['thing1.txt'])
-        mount_ls = os.listdir(self.mounttmp)
+        mount_ls = llfuse.listdir(self.mounttmp)
          self.assertIn('README', mount_ls)
          self.assertIn(self.testcollection, mount_ls)
          self.assertIn(self.testcollection,
-                      os.listdir(os.path.join(self.mounttmp, 'by_id')))
+                      llfuse.listdir(os.path.join(self.mounttmp, 'by_id')))
  
          files = {}
          files[os.path.join(self.mounttmp, self.testcollection, 'thing1.txt')] = 'data 1'
@@ -184,15 +153,15 @@ class FuseTagsTest(MountTestBase):
      def runTest(self):
          self.make_mount(fuse.TagsDirectory)
  
-        d1 = os.listdir(self.mounttmp)
+        d1 = llfuse.listdir(self.mounttmp)
          d1.sort()
          self.assertEqual(['foo_tag'], d1)
  
-        d2 = os.listdir(os.path.join(self.mounttmp, 'foo_tag'))
+        d2 = llfuse.listdir(os.path.join(self.mounttmp, 'foo_tag'))
          d2.sort()
          self.assertEqual(['zzzzz-4zz18-fy296fx3hot09f7'], d2)
  
-        d3 = os.listdir(os.path.join(self.mounttmp, 'foo_tag', 'zzzzz-4zz18-fy296fx3hot09f7'))
+        d3 = llfuse.listdir(os.path.join(self.mounttmp, 'foo_tag', 'zzzzz-4zz18-fy296fx3hot09f7'))
          d3.sort()
          self.assertEqual(['foo'], d3)
  
@@ -208,12 +177,12 @@ class FuseTagsUpdateTest(MountTestBase):
      def runTest(self):
          self.make_mount(fuse.TagsDirectory, poll_time=1)
  
-        self.assertIn('foo_tag', os.listdir(self.mounttmp))
+        self.assertIn('foo_tag', llfuse.listdir(self.mounttmp))
  
          bar_uuid = run_test_server.fixture('collections')['bar_file']['uuid']
          self.tag_collection(bar_uuid, 'fuse_test_tag')
          time.sleep(1)
-        self.assertIn('fuse_test_tag', os.listdir(self.mounttmp))
+        self.assertIn('fuse_test_tag', llfuse.listdir(self.mounttmp))
          self.assertDirContents('fuse_test_tag', [bar_uuid])
  
          baz_uuid = run_test_server.fixture('collections')['baz_file']['uuid']
@@ -234,24 +203,23 @@ class FuseSharedTest(MountTestBase):
          # shared_dirs is a list of the directories exposed
          # by fuse.SharedDirectory (i.e. any object visible
          # to the current user)
-        shared_dirs = os.listdir(self.mounttmp)
+        shared_dirs = llfuse.listdir(self.mounttmp)
          shared_dirs.sort()
          self.assertIn('FUSE User', shared_dirs)
  
          # fuse_user_objs is a list of the objects owned by the FUSE
          # test user (which present as files in the 'FUSE User'
          # directory)
-        fuse_user_objs = os.listdir(os.path.join(self.mounttmp, 'FUSE User'))
+        fuse_user_objs = llfuse.listdir(os.path.join(self.mounttmp, 'FUSE User'))
          fuse_user_objs.sort()
-        self.assertEqual(['Empty collection.link',                # permission link on collection
-                          'FUSE Test Project',                    # project owned by user
+        self.assertEqual(['FUSE Test Project',                    # project owned by user
                            'collection #1 owned by FUSE',          # collection owned by user
                            'collection #2 owned by FUSE',          # collection owned by user
                            'pipeline instance owned by FUSE.pipelineInstance',  # pipeline instance owned by user
                        ], fuse_user_objs)
  
          # test_proj_files is a list of the files in the FUSE Test Project.
-        test_proj_files = os.listdir(os.path.join(self.mounttmp, 'FUSE User', 'FUSE Test Project'))
+        test_proj_files = llfuse.listdir(os.path.join(self.mounttmp, 'FUSE User', 'FUSE Test Project'))
          test_proj_files.sort()
          self.assertEqual(['collection in FUSE project',
                            'pipeline instance in FUSE project.pipelineInstance',
@@ -286,10 +254,10 @@ class FuseHomeTest(MountTestBase):
          self.make_mount(fuse.ProjectDirectory,
                          project_object=self.api.users().current().execute())
  
-        d1 = os.listdir(self.mounttmp)
+        d1 = llfuse.listdir(self.mounttmp)
          self.assertIn('Unrestricted public data', d1)
  
-        d2 = os.listdir(os.path.join(self.mounttmp, 'Unrestricted public data'))
+        d2 = llfuse.listdir(os.path.join(self.mounttmp, 'Unrestricted public data'))
          public_project = run_test_server.fixture('groups')[
              'anonymously_accessible_project']
          found_in = 0
@@ -309,10 +277,759 @@ class FuseHomeTest(MountTestBase):
          self.assertNotEqual(0, found_in)
          self.assertNotEqual(0, found_not_in)
  
-        d3 = os.listdir(os.path.join(self.mounttmp, 'Unrestricted public data', 'GNU General Public License, version 3'))
+        d3 = llfuse.listdir(os.path.join(self.mounttmp, 'Unrestricted public data', 'GNU General Public License, version 3'))
          self.assertEqual(["GNU_General_Public_License,_version_3.pdf"], d3)
  
  
+def fuseModifyFileTestHelperReadStartContents(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            d1 = llfuse.listdir(mounttmp)
+            self.assertEqual(["file1.txt"], d1)
+            with open(os.path.join(mounttmp, "file1.txt")) as f:
+                self.assertEqual("blub", f.read())
+    Test().runTest()
+
+def fuseModifyFileTestHelperReadEndContents(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            d1 = llfuse.listdir(mounttmp)
+            self.assertEqual(["file1.txt"], d1)
+            with open(os.path.join(mounttmp, "file1.txt")) as f:
+                self.assertEqual("plnp", f.read())
+    Test().runTest()
+
+class FuseModifyFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        with collection.open("file1.txt", "w") as f:
+            f.write("blub")
+
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        self.pool.apply(fuseModifyFileTestHelperReadStartContents, (self.mounttmp,))
+
+        with collection.open("file1.txt", "w") as f:
+            f.write("plnp")
+
+        self.pool.apply(fuseModifyFileTestHelperReadEndContents, (self.mounttmp,))
+
+
+class FuseAddFileToCollectionTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        with collection.open("file1.txt", "w") as f:
+            f.write("blub")
+
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertEqual(["file1.txt"], d1)
+
+        with collection.open("file2.txt", "w") as f:
+            f.write("plnp")
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertEqual(["file1.txt", "file2.txt"], sorted(d1))
+
+
+class FuseRemoveFileFromCollectionTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        with collection.open("file1.txt", "w") as f:
+            f.write("blub")
+
+        with collection.open("file2.txt", "w") as f:
+            f.write("plnp")
+
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertEqual(["file1.txt", "file2.txt"], sorted(d1))
+
+        collection.remove("file2.txt")
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertEqual(["file1.txt"], d1)
+
+
+def fuseCreateFileTestHelper(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "w") as f:
+                pass
+    Test().runTest()
+
+class FuseCreateFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertEqual(collection2["manifest_text"], "")
+
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.assertNotIn("file1.txt", collection)
+
+        self.pool.apply(fuseCreateFileTestHelper, (self.mounttmp,))
+
+        self.assertIn("file1.txt", collection)
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertEqual(["file1.txt"], d1)
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\. d41d8cd98f00b204e9800998ecf8427e\+0\+A\S+ 0:0:file1\.txt$')
+
+
+def fuseWriteFileTestHelperWriteFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "w") as f:
+                f.write("Hello world!")
+    Test().runTest()
+
+def fuseWriteFileTestHelperReadFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "r") as f:
+                self.assertEqual(f.read(), "Hello world!")
+    Test().runTest()
+
+class FuseWriteFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.assertNotIn("file1.txt", collection)
+
+        self.pool.apply(fuseWriteFileTestHelperWriteFile, (self.mounttmp,))
+
+        with collection.open("file1.txt") as f:
+            self.assertEqual(f.read(), "Hello world!")
+
+        self.pool.apply(fuseWriteFileTestHelperReadFile, (self.mounttmp,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+
+def fuseUpdateFileTestHelper(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+            with open(os.path.join(mounttmp, "file1.txt"), "r+") as f:
+                fr = f.read()
+                self.assertEqual(fr, "Hello world!")
+                f.seek(0)
+                f.write("Hola mundo!")
+                f.seek(0)
+                fr = f.read()
+                self.assertEqual(fr, "Hola mundo!!")
+
+            with open(os.path.join(mounttmp, "file1.txt"), "r") as f:
+                self.assertEqual(f.read(), "Hola mundo!!")
+
+    Test().runTest()
+
+class FuseUpdateFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        # See note in MountTestBase.setUp
+        self.pool.apply(fuseUpdateFileTestHelper, (self.mounttmp,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\. daaef200ebb921e011e3ae922dd3266b\+11\+A\S+ 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:11:file1\.txt 22:1:file1\.txt$')
+
+
+def fuseMkdirTestHelper(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with self.assertRaises(IOError):
+                with open(os.path.join(mounttmp, "testdir", "file1.txt"), "w") as f:
+                    f.write("Hello world!")
+
+            os.mkdir(os.path.join(mounttmp, "testdir"))
+
+            with self.assertRaises(OSError):
+                os.mkdir(os.path.join(mounttmp, "testdir"))
+
+            d1 = llfuse.listdir(mounttmp)
+            self.assertEqual(["testdir"], d1)
+
+            with open(os.path.join(mounttmp, "testdir", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, "testdir"))
+            self.assertEqual(["file1.txt"], d1)
+
+    Test().runTest()
+
+class FuseMkdirTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.pool.apply(fuseMkdirTestHelper, (self.mounttmp,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+
+def fuseRmTestHelperWriteFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.mkdir(os.path.join(mounttmp, "testdir"))
+
+            with open(os.path.join(mounttmp, "testdir", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+    Test().runTest()
+
+def fuseRmTestHelperDeleteFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            # Can't delete because it's not empty
+            with self.assertRaises(OSError):
+                os.rmdir(os.path.join(mounttmp, "testdir"))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, "testdir"))
+            self.assertEqual(["file1.txt"], d1)
+
+            # Delete file
+            os.remove(os.path.join(mounttmp, "testdir", "file1.txt"))
+
+            # Make sure it's empty
+            d1 = llfuse.listdir(os.path.join(mounttmp, "testdir"))
+            self.assertEqual([], d1)
+
+            # Try to delete it again
+            with self.assertRaises(OSError):
+                os.remove(os.path.join(mounttmp, "testdir", "file1.txt"))
+
+    Test().runTest()
+
+def fuseRmTestHelperRmdir(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            # Should be able to delete now that it is empty
+            os.rmdir(os.path.join(mounttmp, "testdir"))
+
+            # Make sure it's empty
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertEqual([], d1)
+
+            # Try to delete it again
+            with self.assertRaises(OSError):
+                os.rmdir(os.path.join(mounttmp, "testdir"))
+
+    Test().runTest()
+
+class FuseRmTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.pool.apply(fuseRmTestHelperWriteFile, (self.mounttmp,))
+
+        # Starting manifest
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+        self.pool.apply(fuseRmTestHelperDeleteFile, (self.mounttmp,))
+
+        # Can't have empty directories :-( so manifest will be empty.
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertEqual(collection2["manifest_text"], "")
+
+        self.pool.apply(fuseRmTestHelperRmdir, (self.mounttmp,))
+
+        # manifest should be empty now.
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertEqual(collection2["manifest_text"], "")
+
+
+def fuseMvFileTestHelperWriteFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.mkdir(os.path.join(mounttmp, "testdir"))
+
+            with open(os.path.join(mounttmp, "testdir", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+    Test().runTest()
+
+def fuseMvFileTestHelperMoveFile(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertEqual(["testdir"], d1)
+            d1 = llfuse.listdir(os.path.join(mounttmp, "testdir"))
+            self.assertEqual(["file1.txt"], d1)
+
+            os.rename(os.path.join(mounttmp, "testdir", "file1.txt"), os.path.join(mounttmp, "file1.txt"))
+
+            d1 = llfuse.listdir(os.path.join(mounttmp))
+            self.assertEqual(["file1.txt", "testdir"], sorted(d1))
+            d1 = llfuse.listdir(os.path.join(mounttmp, "testdir"))
+            self.assertEqual([], d1)
+
+    Test().runTest()
+
+class FuseMvFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.pool.apply(fuseMvFileTestHelperWriteFile, (self.mounttmp,))
+
+        # Starting manifest
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+        self.pool.apply(fuseMvFileTestHelperMoveFile, (self.mounttmp,))
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+
+def fuseRenameTestHelper(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.mkdir(os.path.join(mounttmp, "testdir"))
+
+            with open(os.path.join(mounttmp, "testdir", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+    Test().runTest()
+
+class FuseRenameTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+        self.assertTrue(m.writable())
+
+        self.pool.apply(fuseRenameTestHelper, (self.mounttmp,))
+
+        # Starting manifest
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+        d1 = llfuse.listdir(os.path.join(self.mounttmp))
+        self.assertEqual(["testdir"], d1)
+        d1 = llfuse.listdir(os.path.join(self.mounttmp, "testdir"))
+        self.assertEqual(["file1.txt"], d1)
+
+        os.rename(os.path.join(self.mounttmp, "testdir"), os.path.join(self.mounttmp, "testdir2"))
+
+        d1 = llfuse.listdir(os.path.join(self.mounttmp))
+        self.assertEqual(["testdir2"], sorted(d1))
+        d1 = llfuse.listdir(os.path.join(self.mounttmp, "testdir2"))
+        self.assertEqual(["file1.txt"], d1)
+
+        collection2 = self.api.collections().get(uuid=collection.manifest_locator()).execute()
+        self.assertRegexpMatches(collection2["manifest_text"],
+            r'\./testdir2 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$')
+
+
+class FuseUpdateFromEventTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        self.operations.listen_for_events(self.api)
+
+        d1 = llfuse.listdir(os.path.join(self.mounttmp))
+        self.assertEqual([], sorted(d1))
+
+        with arvados.collection.Collection(collection.manifest_locator(), api_client=self.api) as collection2:
+            with collection2.open("file1.txt", "w") as f:
+                f.write("foo")
+
+        time.sleep(1)
+
+        # should show up via event bus notify
+
+        d1 = llfuse.listdir(os.path.join(self.mounttmp))
+        self.assertEqual(["file1.txt"], sorted(d1))
+
+
+def fuseFileConflictTestHelper(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "w") as f:
+                f.write("bar")
+
+            d1 = sorted(llfuse.listdir(os.path.join(mounttmp)))
+            self.assertEqual(len(d1), 2)
+
+            with open(os.path.join(mounttmp, "file1.txt"), "r") as f:
+                self.assertEqual(f.read(), "bar")
+
+            self.assertRegexpMatches(d1[1],
+                r'file1\.txt~\d\d\d\d\d\d\d\d-\d\d\d\d\d\d~conflict~')
+
+            with open(os.path.join(mounttmp, d1[1]), "r") as f:
+                self.assertEqual(f.read(), "foo")
+
+    Test().runTest()
+
+class FuseFileConflictTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        d1 = llfuse.listdir(os.path.join(self.mounttmp))
+        self.assertEqual([], sorted(d1))
+
+        with arvados.collection.Collection(collection.manifest_locator(), api_client=self.api) as collection2:
+            with collection2.open("file1.txt", "w") as f:
+                f.write("foo")
+
+        # See note in MountTestBase.setUp
+        self.pool.apply(fuseFileConflictTestHelper, (self.mounttmp,))
+
+
+def fuseUnlinkOpenFileTest(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "file1.txt"), "w+") as f:
+                f.write("foo")
+
+                d1 = llfuse.listdir(os.path.join(mounttmp))
+                self.assertEqual(["file1.txt"], sorted(d1))
+
+                os.remove(os.path.join(mounttmp, "file1.txt"))
+
+                d1 = llfuse.listdir(os.path.join(mounttmp))
+                self.assertEqual([], sorted(d1))
+
+                f.seek(0)
+                self.assertEqual(f.read(), "foo")
+                f.write("bar")
+
+                f.seek(0)
+                self.assertEqual(f.read(), "foobar")
+
+    Test().runTest()
+
+class FuseUnlinkOpenFileTest(MountTestBase):
+    def runTest(self):
+        collection = arvados.collection.Collection(api_client=self.api)
+        collection.save_new()
+
+        m = self.make_mount(fuse.CollectionDirectory)
+        with llfuse.lock:
+            m.new_collection(collection.api_response(), collection)
+
+        # See note in MountTestBase.setUp
+        self.pool.apply(fuseUnlinkOpenFileTest, (self.mounttmp,))
+
+        self.assertEqual(collection.manifest_text(), "")
+
+
+def fuseMvFileBetweenCollectionsTest1(mounttmp, uuid1, uuid2):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, uuid1, "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid1))
+            self.assertEqual(["file1.txt"], sorted(d1))
+            d1 = os.listdir(os.path.join(mounttmp, uuid2))
+            self.assertEqual([], sorted(d1))
+
+    Test().runTest()
+
+def fuseMvFileBetweenCollectionsTest2(mounttmp, uuid1, uuid2):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.rename(os.path.join(mounttmp, uuid1, "file1.txt"), os.path.join(mounttmp, uuid2, "file2.txt"))
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid1))
+            self.assertEqual([], sorted(d1))
+            d1 = os.listdir(os.path.join(mounttmp, uuid2))
+            self.assertEqual(["file2.txt"], sorted(d1))
+
+    Test().runTest()
+
+class FuseMvFileBetweenCollectionsTest(MountTestBase):
+    def runTest(self):
+        collection1 = arvados.collection.Collection(api_client=self.api)
+        collection1.save_new()
+
+        collection2 = arvados.collection.Collection(api_client=self.api)
+        collection2.save_new()
+
+        m = self.make_mount(fuse.MagicDirectory)
+
+        # See note in MountTestBase.setUp
+        self.pool.apply(fuseMvFileBetweenCollectionsTest1, (self.mounttmp,
+                                                  collection1.manifest_locator(),
+                                                  collection2.manifest_locator()))
+
+        collection1.update()
+        collection2.update()
+
+        self.assertRegexpMatches(collection1.manifest_text(), r"\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$")
+        self.assertEqual(collection2.manifest_text(), "")
+
+        self.pool.apply(fuseMvFileBetweenCollectionsTest2, (self.mounttmp,
+                                                  collection1.manifest_locator(),
+                                                  collection2.manifest_locator()))
+
+        collection1.update()
+        collection2.update()
+
+        self.assertEqual(collection1.manifest_text(), "")
+        self.assertRegexpMatches(collection2.manifest_text(), r"\. 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file2\.txt$")
+
+        collection1.stop_threads()
+        collection2.stop_threads()
+
+
+def fuseMvDirBetweenCollectionsTest1(mounttmp, uuid1, uuid2):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.mkdir(os.path.join(mounttmp, uuid1, "testdir"))
+            with open(os.path.join(mounttmp, uuid1, "testdir", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid1))
+            self.assertEqual(["testdir"], sorted(d1))
+            d1 = os.listdir(os.path.join(mounttmp, uuid1, "testdir"))
+            self.assertEqual(["file1.txt"], sorted(d1))
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid2))
+            self.assertEqual([], sorted(d1))
+
+    Test().runTest()
+
+
+def fuseMvDirBetweenCollectionsTest2(mounttmp, uuid1, uuid2):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.rename(os.path.join(mounttmp, uuid1, "testdir"), os.path.join(mounttmp, uuid2, "testdir2"))
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid1))
+            self.assertEqual([], sorted(d1))
+
+            d1 = os.listdir(os.path.join(mounttmp, uuid2))
+            self.assertEqual(["testdir2"], sorted(d1))
+            d1 = os.listdir(os.path.join(mounttmp, uuid2, "testdir2"))
+            self.assertEqual(["file1.txt"], sorted(d1))
+
+            with open(os.path.join(mounttmp, uuid2, "testdir2", "file1.txt"), "r") as f:
+                self.assertEqual(f.read(), "Hello world!")
+
+    Test().runTest()
+
+class FuseMvDirBetweenCollectionsTest(MountTestBase):
+    def runTest(self):
+        collection1 = arvados.collection.Collection(api_client=self.api)
+        collection1.save_new()
+
+        collection2 = arvados.collection.Collection(api_client=self.api)
+        collection2.save_new()
+
+        m = self.make_mount(fuse.MagicDirectory)
+
+        # See note in MountTestBase.setUp
+        self.pool.apply(fuseMvDirBetweenCollectionsTest1, (self.mounttmp,
+                                                  collection1.manifest_locator(),
+                                                  collection2.manifest_locator()))
+
+        collection1.update()
+        collection2.update()
+
+        self.assertRegexpMatches(collection1.manifest_text(), r"\./testdir 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$")
+        self.assertEqual(collection2.manifest_text(), "")
+
+        self.pool.apply(fuseMvDirBetweenCollectionsTest2, (self.mounttmp,
+                                                  collection1.manifest_locator(),
+                                                  collection2.manifest_locator()))
+
+        collection1.update()
+        collection2.update()
+
+        self.assertEqual(collection1.manifest_text(), "")
+        self.assertRegexpMatches(collection2.manifest_text(), r"\./testdir2 86fb269d190d2c85f6e0468ceca42a20\+12\+A\S+ 0:12:file1\.txt$")
+
+        collection1.stop_threads()
+        collection2.stop_threads()
+
+def fuseProjectMkdirTestHelper1(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            os.mkdir(os.path.join(mounttmp, "testcollection"))
+            with self.assertRaises(OSError):
+                os.mkdir(os.path.join(mounttmp, "testcollection"))
+    Test().runTest()
+
+def fuseProjectMkdirTestHelper2(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            with open(os.path.join(mounttmp, "testcollection", "file1.txt"), "w") as f:
+                f.write("Hello world!")
+            with self.assertRaises(OSError):
+                os.rmdir(os.path.join(mounttmp, "testcollection"))
+            os.remove(os.path.join(mounttmp, "testcollection", "file1.txt"))
+            with self.assertRaises(OSError):
+                os.remove(os.path.join(mounttmp, "testcollection"))
+            os.rmdir(os.path.join(mounttmp, "testcollection"))
+    Test().runTest()
+
+class FuseProjectMkdirRmdirTest(MountTestBase):
+    def runTest(self):
+        self.make_mount(fuse.ProjectDirectory,
+                        project_object=self.api.users().current().execute())
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertNotIn('testcollection', d1)
+
+        self.pool.apply(fuseProjectMkdirTestHelper1, (self.mounttmp,))
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertIn('testcollection', d1)
+
+        self.pool.apply(fuseProjectMkdirTestHelper2, (self.mounttmp,))
+
+        d1 = llfuse.listdir(self.mounttmp)
+        self.assertNotIn('testcollection', d1)
+
+
+def fuseProjectMvTestHelper1(mounttmp):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            d1 = llfuse.listdir(mounttmp)
+            self.assertNotIn('testcollection', d1)
+
+            os.mkdir(os.path.join(mounttmp, "testcollection"))
+
+            d1 = llfuse.listdir(mounttmp)
+            self.assertIn('testcollection', d1)
+
+            with self.assertRaises(OSError):
+                os.rename(os.path.join(mounttmp, "testcollection"), os.path.join(mounttmp, 'Unrestricted public data'))
+
+            os.rename(os.path.join(mounttmp, "testcollection"), os.path.join(mounttmp, 'Unrestricted public data', 'testcollection'))
+
+            d1 = llfuse.listdir(mounttmp)
+            self.assertNotIn('testcollection', d1)
+
+            d1 = llfuse.listdir(os.path.join(mounttmp, 'Unrestricted public data'))
+            self.assertIn('testcollection', d1)
+
+    Test().runTest()
+
+class FuseProjectMvTest(MountTestBase):
+    def runTest(self):
+        self.make_mount(fuse.ProjectDirectory,
+                        project_object=self.api.users().current().execute())
+
+        self.pool.apply(fuseProjectMvTestHelper1, (self.mounttmp,))
+
+
+def fuseFsyncTestHelper(mounttmp, k):
+    class Test(unittest.TestCase):
+        def runTest(self):
+            fd = os.open(os.path.join(mounttmp, k), os.O_RDONLY)
+            os.fsync(fd)
+            os.close(fd)
+
+    Test().runTest()
+
+class FuseFsyncTest(FuseMagicTest):
+    def runTest(self):
+        self.make_mount(fuse.MagicDirectory)
+        self.pool.apply(fuseFsyncTestHelper, (self.mounttmp, self.testcollection))
+
+
+class MagicDirApiError(FuseMagicTest):
+    def setUp(self):
+        api = mock.MagicMock()
+        super(MagicDirApiError, self).setUp(api=api)
+        api.collections().get().execute.side_effect = iter([Exception('API fail'), {"manifest_text": self.test_manifest}])
+        api.keep.get.side_effect = Exception('Keep fail')
+
+    def runTest(self):
+        self.make_mount(fuse.MagicDirectory)
+
+        self.operations.inodes.inode_cache.cap = 1
+        self.operations.inodes.inode_cache.min_entries = 2
+
+        with self.assertRaises(OSError):
+            llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+        llfuse.listdir(os.path.join(self.mounttmp, self.testcollection))
+
+
  class FuseUnitTest(unittest.TestCase):
      def test_sanitize_filename(self):
          acceptable = [
diff --git a/services/keepproxy/.gitignore b/services/keepproxy/.gitignore

new file mode 100644 (file)

index 0000000..a4c8ad9
--- /dev/null
+++ b/services/keepproxy/.gitignore
@@ -0,0 +1 @@
+keepproxy
diff --git a/services/keepproxy/keepproxy.go b/services/keepproxy/keepproxy.go

index b8c30d113ea38447266aeef3b1c579f80d877ee3..d0af4a58ea5e7746d8243fb6272820e8c4801307 100644 (file)
--- a/services/keepproxy/keepproxy.go
+++ b/services/keepproxy/keepproxy.go
@@ -1,6 +1,7 @@
  package main
  
  import (
+       "errors"
         "flag"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
@@ -13,6 +14,8 @@ import (
         "net/http"
         "os"
         "os/signal"
+       "reflect"
+       "regexp"
         "sync"
         "syscall"
         "time"
@@ -105,7 +108,7 @@ func main() {
                 log.Fatalf("Could not listen on %v", listen)
         }
  
-       go RefreshServicesList(&kc)
+       go RefreshServicesList(kc)
  
         // Shut down the server gracefully (by closing the listener)
         // if SIGTERM is received.
@@ -118,10 +121,10 @@ func main() {
         signal.Notify(term, syscall.SIGTERM)
         signal.Notify(term, syscall.SIGINT)
  
-       log.Printf("Arvados Keep proxy started listening on %v with server list %v", listener.Addr(), kc.ServiceRoots())
+       log.Printf("Arvados Keep proxy started listening on %v", listener.Addr())
  
         // Start listening for requests.
-       http.Serve(listener, MakeRESTRouter(!no_get, !no_put, &kc))
+       http.Serve(listener, MakeRESTRouter(!no_get, !no_put, kc))
  
         log.Println("shutting down")
  }
@@ -134,16 +137,25 @@ type ApiTokenCache struct {
  
  // Refresh the keep service list every five minutes.
  func RefreshServicesList(kc *keepclient.KeepClient) {
+       var previousRoots = []map[string]string{}
+       var delay time.Duration = 0
         for {
-               time.Sleep(300 * time.Second)
-               oldservices := kc.ServiceRoots()
-               kc.DiscoverKeepServers()
-               newservices := kc.ServiceRoots()
-               s1 := fmt.Sprint(oldservices)
-               s2 := fmt.Sprint(newservices)
-               if s1 != s2 {
-                       log.Printf("Updated server list to %v", s2)
+               time.Sleep(delay * time.Second)
+               delay = 300
+               if err := kc.DiscoverKeepServers(); err != nil {
+                       log.Println("Error retrieving services list:", err)
+                       delay = 3
+                       continue
                 }
+               newRoots := []map[string]string{kc.LocalRoots(), kc.GatewayRoots()}
+               if !reflect.DeepEqual(previousRoots, newRoots) {
+                       log.Printf("Updated services list: locals %v gateways %v", newRoots[0], newRoots[1])
+               }
+               if len(newRoots[0]) == 0 {
+                       log.Print("WARNING: No local services. Retrying in 3 seconds.")
+                       delay = 3
+               }
+               previousRoots = newRoots
         }
  }
  
@@ -247,14 +259,14 @@ func MakeRESTRouter(
         rest := mux.NewRouter()
  
         if enable_get {
-               rest.Handle(`/{hash:[0-9a-f]{32}}+{hints}`,
+               rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`,
                         GetBlockHandler{kc, t}).Methods("GET", "HEAD")
-               rest.Handle(`/{hash:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
+               rest.Handle(`/{locator:[0-9a-f]{32}}`, GetBlockHandler{kc, t}).Methods("GET", "HEAD")
         }
  
         if enable_put {
-               rest.Handle(`/{hash:[0-9a-f]{32}}+{hints}`, PutBlockHandler{kc, t}).Methods("PUT")
-               rest.Handle(`/{hash:[0-9a-f]{32}}`, PutBlockHandler{kc, t}).Methods("PUT")
+               rest.Handle(`/{locator:[0-9a-f]{32}\+.*}`, PutBlockHandler{kc, t}).Methods("PUT")
+               rest.Handle(`/{locator:[0-9a-f]{32}}`, PutBlockHandler{kc, t}).Methods("PUT")
                 rest.Handle(`/`, PutBlockHandler{kc, t}).Methods("POST")
                 rest.Handle(`/{any}`, OptionsHandler{}).Methods("OPTIONS")
                 rest.Handle(`/`, OptionsHandler{}).Methods("OPTIONS")
@@ -282,22 +294,34 @@ func (this OptionsHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request
         SetCorsHeaders(resp)
  }
  
-func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       SetCorsHeaders(resp)
+var BadAuthorizationHeader = errors.New("Missing or invalid Authorization header")
+var ContentLengthMismatch = errors.New("Actual length != expected content length")
+var MethodNotSupported = errors.New("Method not supported")
  
-       kc := *this.KeepClient
+var removeHint, _ = regexp.Compile("\\+K@[a-z0-9]{5}(\\+|$)")
  
-       hash := mux.Vars(req)["hash"]
-       hints := mux.Vars(req)["hints"]
+func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
+       SetCorsHeaders(resp)
  
-       locator := keepclient.MakeLocator2(hash, hints)
+       locator := mux.Vars(req)["locator"]
+       var err error
+       var status int
+       var expectLength, responseLength int64
+       var proxiedURI = "-"
+
+       defer func() {
+               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, responseLength, proxiedURI, err)
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
  
-       log.Printf("%s: %s %s begin", GetRemoteAddress(req), req.Method, hash)
+       kc := *this.KeepClient
  
         var pass bool
         var tok string
         if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
-               http.Error(resp, "Missing or invalid Authorization header", http.StatusForbidden)
+               status, err = http.StatusForbidden, BadAuthorizationHeader
                 return
         }
  
@@ -307,92 +331,99 @@ func (this GetBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
         kc.Arvados = &arvclient
  
         var reader io.ReadCloser
-       var err error
-       var blocklen int64
  
-       if req.Method == "GET" {
-               reader, blocklen, _, err = kc.AuthorizedGet(hash, locator.Signature, locator.Timestamp)
+       locator = removeHint.ReplaceAllString(locator, "$1")
+
+       switch req.Method {
+       case "HEAD":
+               expectLength, proxiedURI, err = kc.Ask(locator)
+       case "GET":
+               reader, expectLength, proxiedURI, err = kc.Get(locator)
                 if reader != nil {
                         defer reader.Close()
                 }
-       } else if req.Method == "HEAD" {
-               blocklen, _, err = kc.AuthorizedAsk(hash, locator.Signature, locator.Timestamp)
+       default:
+               status, err = http.StatusNotImplemented, MethodNotSupported
+               return
         }
  
-       if blocklen == -1 {
-               log.Printf("%s: %s %s Keep server did not return Content-Length",
-                       GetRemoteAddress(req), req.Method, hash)
+       if expectLength == -1 {
+               log.Println("Warning:", GetRemoteAddress(req), req.Method, proxiedURI, "Content-Length not provided")
         }
  
-       var status = 0
         switch err {
         case nil:
                 status = http.StatusOK
-               resp.Header().Set("Content-Length", fmt.Sprint(blocklen))
-               if reader != nil {
-                       n, err2 := io.Copy(resp, reader)
-                       if blocklen > -1 && n != blocklen {
-                               log.Printf("%s: %s %s %v %v mismatched copy size expected Content-Length: %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n, blocklen)
-                       } else if err2 == nil {
-                               log.Printf("%s: %s %s %v %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n)
-                       } else {
-                               log.Printf("%s: %s %s %v %v copy error: %v",
-                                       GetRemoteAddress(req), req.Method, hash, status, n, err2.Error())
+               resp.Header().Set("Content-Length", fmt.Sprint(expectLength))
+               switch req.Method {
+               case "HEAD":
+                       responseLength = 0
+               case "GET":
+                       responseLength, err = io.Copy(resp, reader)
+                       if err == nil && expectLength > -1 && responseLength != expectLength {
+                               err = ContentLengthMismatch
                         }
-               } else {
-                       log.Printf("%s: %s %s %v 0", GetRemoteAddress(req), req.Method, hash, status)
                 }
         case keepclient.BlockNotFound:
                 status = http.StatusNotFound
-               http.Error(resp, "Not Found", http.StatusNotFound)
         default:
                 status = http.StatusBadGateway
-               http.Error(resp, err.Error(), http.StatusBadGateway)
-       }
-
-       if err != nil {
-               log.Printf("%s: %s %s %v error: %v",
-                       GetRemoteAddress(req), req.Method, hash, status, err.Error())
         }
  }
  
+var LengthRequiredError = errors.New(http.StatusText(http.StatusLengthRequired))
+var LengthMismatchError = errors.New("Locator size hint does not match Content-Length header")
+
  func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
         SetCorsHeaders(resp)
  
         kc := *this.KeepClient
+       var err error
+       var expectLength int64 = -1
+       var status = http.StatusInternalServerError
+       var wroteReplicas int
+       var locatorOut string = "-"
+
+       defer func() {
+               log.Println(GetRemoteAddress(req), req.Method, req.URL.Path, status, expectLength, kc.Want_replicas, wroteReplicas, locatorOut, err)
+               if status != http.StatusOK {
+                       http.Error(resp, err.Error(), status)
+               }
+       }()
  
-       hash := mux.Vars(req)["hash"]
-       hints := mux.Vars(req)["hints"]
-
-       locator := keepclient.MakeLocator2(hash, hints)
+       locatorIn := mux.Vars(req)["locator"]
  
-       var contentLength int64 = -1
         if req.Header.Get("Content-Length") != "" {
-               _, err := fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &contentLength)
+               _, err := fmt.Sscanf(req.Header.Get("Content-Length"), "%d", &expectLength)
                 if err != nil {
-                       resp.Header().Set("Content-Length", fmt.Sprintf("%d", contentLength))
+                       resp.Header().Set("Content-Length", fmt.Sprintf("%d", expectLength))
                 }
  
         }
  
-       log.Printf("%s: %s %s Content-Length %v", GetRemoteAddress(req), req.Method, hash, contentLength)
-
-       if contentLength < 0 {
-               http.Error(resp, "Must include Content-Length header", http.StatusLengthRequired)
+       if expectLength < 0 {
+               err = LengthRequiredError
+               status = http.StatusLengthRequired
                 return
         }
  
-       if locator.Size > 0 && int64(locator.Size) != contentLength {
-               http.Error(resp, "Locator size hint does not match Content-Length header", http.StatusBadRequest)
-               return
+       if locatorIn != "" {
+               var loc *keepclient.Locator
+               if loc, err = keepclient.MakeLocator(locatorIn); err != nil {
+                       status = http.StatusBadRequest
+                       return
+               } else if loc.Size > 0 && int64(loc.Size) != expectLength {
+                       err = LengthMismatchError
+                       status = http.StatusBadRequest
+                       return
+               }
         }
  
         var pass bool
         var tok string
         if pass, tok = CheckAuthorizationHeader(kc, this.ApiTokenCache, req); !pass {
-               http.Error(resp, "Missing or invalid Authorization header", http.StatusForbidden)
+               err = BadAuthorizationHeader
+               status = http.StatusForbidden
                 return
         }
  
@@ -411,57 +442,42 @@ func (this PutBlockHandler) ServeHTTP(resp http.ResponseWriter, req *http.Reques
         }
  
         // Now try to put the block through
-       var replicas int
-       var put_err error
-       if hash == "" {
+       if locatorIn == "" {
                 if bytes, err := ioutil.ReadAll(req.Body); err != nil {
-                       msg := fmt.Sprintf("Error reading request body: %s", err)
-                       log.Printf(msg)
-                       http.Error(resp, msg, http.StatusInternalServerError)
+                       err = errors.New(fmt.Sprintf("Error reading request body: %s", err))
+                       status = http.StatusInternalServerError
                         return
                 } else {
-                       hash, replicas, put_err = kc.PutB(bytes)
+                       locatorOut, wroteReplicas, err = kc.PutB(bytes)
                 }
         } else {
-               hash, replicas, put_err = kc.PutHR(hash, req.Body, contentLength)
+               locatorOut, wroteReplicas, err = kc.PutHR(locatorIn, req.Body, expectLength)
         }
  
         // Tell the client how many successful PUTs we accomplished
-       resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", replicas))
+       resp.Header().Set(keepclient.X_Keep_Replicas_Stored, fmt.Sprintf("%d", wroteReplicas))
  
-       switch put_err {
+       switch err {
         case nil:
-               // Default will return http.StatusOK
-               log.Printf("%s: %s %s finished, stored %v replicas (desired %v)", GetRemoteAddress(req), req.Method, hash, replicas, kc.Want_replicas)
-               n, err2 := io.WriteString(resp, hash)
-               if err2 != nil {
-                       log.Printf("%s: wrote %v bytes to response body and got error %v", n, err2.Error())
-               }
+               status = http.StatusOK
+               _, err = io.WriteString(resp, locatorOut)
  
         case keepclient.OversizeBlockError:
                 // Too much data
-               http.Error(resp, fmt.Sprintf("Exceeded maximum blocksize %d", keepclient.BLOCKSIZE), http.StatusRequestEntityTooLarge)
+               status = http.StatusRequestEntityTooLarge
  
         case keepclient.InsufficientReplicasError:
-               if replicas > 0 {
+               if wroteReplicas > 0 {
                         // At least one write is considered success.  The
                         // client can decide if getting less than the number of
                         // replications it asked for is a fatal error.
-                       // Default will return http.StatusOK
-                       n, err2 := io.WriteString(resp, hash)
-                       if err2 != nil {
-                               log.Printf("%s: wrote %v bytes to response body and got error %v", n, err2.Error())
-                       }
+                       status = http.StatusOK
+                       _, err = io.WriteString(resp, locatorOut)
                 } else {
-                       http.Error(resp, put_err.Error(), http.StatusServiceUnavailable)
+                       status = http.StatusServiceUnavailable
                 }
  
         default:
-               http.Error(resp, put_err.Error(), http.StatusBadGateway)
-       }
-
-       if put_err != nil {
-               log.Printf("%s: %s %s stored %v replicas (desired %v) got error %v", GetRemoteAddress(req), req.Method, hash, replicas, kc.Want_replicas, put_err.Error())
+               status = http.StatusBadGateway
         }
-
  }
diff --git a/services/keepproxy/keepproxy_test.go b/services/keepproxy/keepproxy_test.go

index e3b4e36b63de23dee806a35a9c9d55958cbc3afd..5bd832b511a59c5367af117f7d76acd1b84a1f66 100644 (file)
--- a/services/keepproxy/keepproxy_test.go
+++ b/services/keepproxy/keepproxy_test.go
@@ -115,12 +115,16 @@ func runProxy(c *C, args []string, port int, bogusClientToken bool) keepclient.K
                 Using_proxy:   true,
                 Client:        &http.Client{},
         }
-       kc.SetServiceRoots(map[string]string{
+       locals := map[string]string{
                 "proxy": fmt.Sprintf("http://localhost:%v", port),
-       })
+       }
+       writableLocals := map[string]string{
+               "proxy": fmt.Sprintf("http://localhost:%v", port),
+       }
+       kc.SetServiceRoots(locals, writableLocals, nil)
         c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.ServiceRoots()), Equals, 1)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 1)
+       for _, root := range kc.LocalRoots() {
                 c.Check(root, Equals, fmt.Sprintf("http://localhost:%v", port))
         }
         log.Print("keepclient created")
@@ -154,8 +158,8 @@ func (s *ServerRequiredSuite) TestPutAskGet(c *C) {
         c.Assert(err, Equals, nil)
         c.Check(kc.Arvados.External, Equals, true)
         c.Check(kc.Using_proxy, Equals, true)
-       c.Check(len(kc.ServiceRoots()), Equals, 1)
-       for _, root := range kc.ServiceRoots() {
+       c.Check(len(kc.LocalRoots()), Equals, 1)
+       for _, root := range kc.LocalRoots() {
                 c.Check(root, Equals, "http://localhost:29950")
         }
         os.Setenv("ARVADOS_EXTERNAL_CLIENT", "")
@@ -386,3 +390,19 @@ func (s *ServerRequiredSuite) TestPostWithoutHash(c *C) {
                         fmt.Sprintf("%x+%d", md5.Sum([]byte("qux")), 3))
         }
  }
+
+func (s *ServerRequiredSuite) TestStripHint(c *C) {
+       c.Check(removeHint.ReplaceAllString("http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73+K@zzzzz", "$1"),
+               Equals,
+               "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
+       c.Check(removeHint.ReplaceAllString("http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73", "$1"),
+               Equals,
+               "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
+       c.Check(removeHint.ReplaceAllString("http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz", "$1"),
+               Equals,
+               "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz")
+       c.Check(removeHint.ReplaceAllString("http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73", "$1"),
+               Equals,
+               "http://keep.zzzzz.arvadosapi.com:25107/2228819a18d3727630fa30c81853d23f+67108864+K@zzzzz-zzzzz-zzzzzzzzzzzzzzz+A37b6ab198qqqq28d903b975266b23ee711e1852c@55635f73")
+
+}
diff --git a/services/keepstore/.gitignore b/services/keepstore/.gitignore

new file mode 100644 (file)

index 0000000..c195c4a
--- /dev/null
+++ b/services/keepstore/.gitignore
@@ -0,0 +1 @@
+keepstore
diff --git a/services/keepstore/bufferpool.go b/services/keepstore/bufferpool.go

new file mode 100644 (file)

index 0000000..9a35094
--- /dev/null
+++ b/services/keepstore/bufferpool.go
@@ -0,0 +1,63 @@
+package main
+
+import (
+       "log"
+       "sync"
+       "sync/atomic"
+       "time"
+)
+
+type bufferPool struct {
+       // limiter has a "true" placeholder for each in-use buffer.
+       limiter chan bool
+       // allocated is the number of bytes currently allocated to buffers.
+       allocated uint64
+       // Pool has unused buffers.
+       sync.Pool
+}
+
+func newBufferPool(count int, bufSize int) *bufferPool {
+       p := bufferPool{}
+       p.New = func() interface{} {
+               atomic.AddUint64(&p.allocated, uint64(bufSize))
+               return make([]byte, bufSize)
+       }
+       p.limiter = make(chan bool, count)
+       return &p
+}
+
+func (p *bufferPool) Get(size int) []byte {
+       select {
+       case p.limiter <- true:
+       default:
+               t0 := time.Now()
+               log.Printf("reached max buffers (%d), waiting", cap(p.limiter))
+               p.limiter <- true
+               log.Printf("waited %v for a buffer", time.Since(t0))
+       }
+       buf := p.Pool.Get().([]byte)
+       if cap(buf) < size {
+               log.Fatalf("bufferPool Get(size=%d) but max=%d", size, cap(buf))
+       }
+       return buf[:size]
+}
+
+func (p *bufferPool) Put(buf []byte) {
+       p.Pool.Put(buf)
+       <-p.limiter
+}
+
+// Alloc returns the number of bytes allocated to buffers.
+func (p *bufferPool) Alloc() uint64 {
+       return atomic.LoadUint64(&p.allocated)
+}
+
+// Cap returns the maximum number of buffers allowed.
+func (p *bufferPool) Cap() int {
+       return cap(p.limiter)
+}
+
+// Len returns the number of buffers in use right now.
+func (p *bufferPool) Len() int {
+       return len(p.limiter)
+}
diff --git a/services/keepstore/bufferpool_test.go b/services/keepstore/bufferpool_test.go

new file mode 100644 (file)

index 0000000..95d118e
--- /dev/null
+++ b/services/keepstore/bufferpool_test.go
@@ -0,0 +1,92 @@
+package main
+
+import (
+       . "gopkg.in/check.v1"
+       "testing"
+       "time"
+)
+
+// Gocheck boilerplate
+func TestBufferPool(t *testing.T) {
+       TestingT(t)
+}
+
+var _ = Suite(&BufferPoolSuite{})
+
+type BufferPoolSuite struct{}
+
+// Initialize a default-sized buffer pool for the benefit of test
+// suites that don't run main().
+func init() {
+       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+}
+
+// Restore sane default after bufferpool's own tests
+func (s *BufferPoolSuite) TearDownTest(c *C) {
+       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolBufSize(c *C) {
+       bufs := newBufferPool(2, 10)
+       b1 := bufs.Get(1)
+       bufs.Get(2)
+       bufs.Put(b1)
+       b3 := bufs.Get(3)
+       c.Check(len(b3), Equals, 3)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolUnderLimit(c *C) {
+       bufs := newBufferPool(3, 10)
+       b1 := bufs.Get(10)
+       bufs.Get(10)
+       testBufferPoolRace(c, bufs, b1, "Get")
+}
+
+func (s *BufferPoolSuite) TestBufferPoolAtLimit(c *C) {
+       bufs := newBufferPool(2, 10)
+       b1 := bufs.Get(10)
+       bufs.Get(10)
+       testBufferPoolRace(c, bufs, b1, "Put")
+}
+
+func testBufferPoolRace(c *C, bufs *bufferPool, unused []byte, expectWin string) {
+       race := make(chan string)
+       go func() {
+               bufs.Get(10)
+               time.Sleep(time.Millisecond)
+               race <- "Get"
+       }()
+       go func() {
+               time.Sleep(10 * time.Millisecond)
+               bufs.Put(unused)
+               race <- "Put"
+       }()
+       c.Check(<-race, Equals, expectWin)
+       c.Check(<-race, Not(Equals), expectWin)
+       close(race)
+}
+
+func (s *BufferPoolSuite) TestBufferPoolReuse(c *C) {
+       bufs := newBufferPool(2, 10)
+       bufs.Get(10)
+       last := bufs.Get(10)
+       // The buffer pool is allowed to throw away unused buffers
+       // (e.g., during sync.Pool's garbage collection hook, in the
+       // the current implementation). However, if unused buffers are
+       // getting thrown away and reallocated more than {arbitrary
+       // frequency threshold} during a busy loop, it's not acting
+       // much like a buffer pool.
+       allocs := 1000
+       reuses := 0
+       for i := 0; i < allocs; i++ {
+               bufs.Put(last)
+               next := bufs.Get(10)
+               copy(last, []byte("last"))
+               copy(next, []byte("next"))
+               if last[0] == 'n' {
+                       reuses++
+               }
+               last = next
+       }
+       c.Check(reuses > allocs*95/100, Equals, true)
+}
diff --git a/services/keepstore/handler_test.go b/services/keepstore/handler_test.go

index 05b410c97dd28c74cdb5e4639c15572497975a89..8be471025db5119fb25510d2305fdaaa5cbb7257 100644 (file)
--- a/services/keepstore/handler_test.go
+++ b/services/keepstore/handler_test.go
@@ -43,9 +43,9 @@ func TestGetHandler(t *testing.T) {
  
         // Prepare two test Keep volumes. Our block is stored on the second volume.
         KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
         if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
                 t.Error(err)
         }
@@ -54,11 +54,11 @@ func TestGetHandler(t *testing.T) {
         // Turn on permission settings so we can generate signed locators.
         enforce_permissions = true
         PermissionSecret = []byte(known_key)
-       permission_ttl = time.Duration(300) * time.Second
+       blob_signature_ttl = 300 * time.Second
  
         var (
                 unsigned_locator  = "/" + TEST_HASH
-               valid_timestamp   = time.Now().Add(permission_ttl)
+               valid_timestamp   = time.Now().Add(blob_signature_ttl)
                 expired_timestamp = time.Now().Add(-time.Hour)
                 signed_locator    = "/" + SignLocator(TEST_HASH, known_token, valid_timestamp)
                 expired_locator   = "/" + SignLocator(TEST_HASH, known_token, expired_timestamp)
@@ -151,7 +151,7 @@ func TestPutHandler(t *testing.T) {
  
         // Prepare two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
  
         // --------------
         // No server key.
@@ -176,7 +176,7 @@ func TestPutHandler(t *testing.T) {
         // With a server key.
  
         PermissionSecret = []byte(known_key)
-       permission_ttl = time.Duration(300) * time.Second
+       blob_signature_ttl = 300 * time.Second
  
         // When a permission key is available, the locator returned
         // from an authenticated PUT request will be signed.
@@ -195,7 +195,7 @@ func TestPutHandler(t *testing.T) {
                 "Authenticated PUT, signed locator, with server key",
                 http.StatusOK, response)
         response_locator := strings.TrimSpace(response.Body.String())
-       if !VerifySignature(response_locator, known_token) {
+       if VerifySignature(response_locator, known_token) != nil {
                 t.Errorf("Authenticated PUT, signed locator, with server key:\n"+
                         "response '%s' does not contain a valid signature",
                         response_locator)
@@ -218,6 +218,47 @@ func TestPutHandler(t *testing.T) {
                 TEST_HASH_PUT_RESPONSE, response)
  }
  
+func TestPutAndDeleteSkipReadonlyVolumes(t *testing.T) {
+       defer teardown()
+       data_manager_token = "fake-data-manager-token"
+       vols := []*MockVolume{CreateMockVolume(), CreateMockVolume()}
+       vols[0].Readonly = true
+       KeepVM = MakeRRVolumeManager([]Volume{vols[0], vols[1]})
+       defer KeepVM.Close()
+       IssueRequest(
+               &RequestTester{
+                       method:       "PUT",
+                       uri:          "/" + TEST_HASH,
+                       request_body: TEST_BLOCK,
+               })
+       never_delete = false
+       IssueRequest(
+               &RequestTester{
+                       method:       "DELETE",
+                       uri:          "/" + TEST_HASH,
+                       request_body: TEST_BLOCK,
+                       api_token:    data_manager_token,
+               })
+       type expect struct {
+               volnum    int
+               method    string
+               callcount int
+       }
+       for _, e := range []expect{
+               {0, "Get", 0},
+               {0, "Touch", 0},
+               {0, "Put", 0},
+               {0, "Delete", 0},
+               {1, "Get", 1},
+               {1, "Put", 1},
+               {1, "Delete", 1},
+       } {
+               if calls := vols[e.volnum].CallCount(e.method); calls != e.callcount {
+                       t.Errorf("Got %d %s() on vol %d, expect %d", calls, e.method, e.volnum, e.callcount)
+               }
+       }
+}
+
  // Test /index requests:
  //   - unauthenticated /index request
  //   - unauthenticated /index/prefix request
@@ -236,9 +277,9 @@ func TestIndexHandler(t *testing.T) {
         // Include multiple blocks on different volumes, and
         // some metadata files (which should be omitted from index listings)
         KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
         vols[0].Put(TEST_HASH, TEST_BLOCK)
         vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
         vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
@@ -337,7 +378,7 @@ func TestIndexHandler(t *testing.T) {
                 response)
  
         expected := `^` + TEST_HASH + `\+\d+ \d+\n` +
-               TEST_HASH_2 + `\+\d+ \d+\n$`
+               TEST_HASH_2 + `\+\d+ \d+\n\n$`
         match, _ := regexp.MatchString(expected, response.Body.String())
         if !match {
                 t.Errorf(
@@ -353,7 +394,7 @@ func TestIndexHandler(t *testing.T) {
                 http.StatusOK,
                 response)
  
-       expected = `^` + TEST_HASH + `\+\d+ \d+\n$`
+       expected = `^` + TEST_HASH + `\+\d+ \d+\n\n$`
         match, _ = regexp.MatchString(expected, response.Body.String())
         if !match {
                 t.Errorf(
@@ -395,19 +436,21 @@ func TestDeleteHandler(t *testing.T) {
         // Include multiple blocks on different volumes, and
         // some metadata files (which should be omitted from index listings)
         KeepVM = MakeTestVolumeManager(2)
-       defer KeepVM.Quit()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
         vols[0].Put(TEST_HASH, TEST_BLOCK)
  
-       // Explicitly set the permission_ttl to 0 for these
+       // Explicitly set the blob_signature_ttl to 0 for these
         // tests, to ensure the MockVolume deletes the blocks
         // even though they have just been created.
-       permission_ttl = time.Duration(0)
+       blob_signature_ttl = time.Duration(0)
  
         var user_token = "NOT DATA MANAGER TOKEN"
         data_manager_token = "DATA MANAGER TOKEN"
  
+       never_delete = false
+
         unauth_req := &RequestTester{
                 method: "DELETE",
                 uri:    "/" + TEST_HASH,
@@ -488,10 +531,10 @@ func TestDeleteHandler(t *testing.T) {
                 t.Error("superuser_existing_block_req: block not deleted")
         }
  
-       // A DELETE request on a block newer than permission_ttl should return
-       // success but leave the block on the volume.
+       // A DELETE request on a block newer than blob_signature_ttl
+       // should return success but leave the block on the volume.
         vols[0].Put(TEST_HASH, TEST_BLOCK)
-       permission_ttl = time.Duration(1) * time.Hour
+       blob_signature_ttl = time.Hour
  
         response = IssueRequest(superuser_existing_block_req)
         ExpectStatusCode(t,
@@ -545,6 +588,8 @@ func TestPullHandler(t *testing.T) {
         var user_token = "USER TOKEN"
         data_manager_token = "DATA MANAGER TOKEN"
  
+       pullq = NewWorkQueue()
+
         good_json := []byte(`[
                 {
                         "locator":"locator_with_two_servers",
@@ -594,7 +639,7 @@ func TestPullHandler(t *testing.T) {
                         "Invalid pull request from the data manager",
                         RequestTester{"/pull", data_manager_token, "PUT", bad_json},
                         http.StatusBadRequest,
-                       "Bad Request\n",
+                       "",
                 },
         }
  
@@ -649,6 +694,8 @@ func TestTrashHandler(t *testing.T) {
         var user_token = "USER TOKEN"
         data_manager_token = "DATA MANAGER TOKEN"
  
+       trashq = NewWorkQueue()
+
         good_json := []byte(`[
                 {
                         "locator":"block1",
@@ -696,7 +743,7 @@ func TestTrashHandler(t *testing.T) {
                         "Invalid trash list from the data manager",
                         RequestTester{"/trash", data_manager_token, "PUT", bad_json},
                         http.StatusBadRequest,
-                       "Bad Request\n",
+                       "",
                 },
         }
  
@@ -744,7 +791,7 @@ func ExpectStatusCode(
         expected_status int,
         response *httptest.ResponseRecorder) {
         if response.Code != expected_status {
-               t.Errorf("%s: expected status %s, got %+v",
+               t.Errorf("%s: expected status %d, got %+v",
                         testname, expected_status, response)
         }
  }
@@ -754,8 +801,87 @@ func ExpectBody(
         testname string,
         expected_body string,
         response *httptest.ResponseRecorder) {
-       if response.Body.String() != expected_body {
+       if expected_body != "" && response.Body.String() != expected_body {
                 t.Errorf("%s: expected response body '%s', got %+v",
                         testname, expected_body, response)
         }
  }
+
+// Invoke the PutBlockHandler a bunch of times to test for bufferpool resource
+// leak.
+func TestPutHandlerNoBufferleak(t *testing.T) {
+       defer teardown()
+
+       // Prepare two test Keep volumes.
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
+       ok := make(chan bool)
+       go func() {
+               for i := 0; i < maxBuffers+1; i += 1 {
+                       // Unauthenticated request, no server key
+                       // => OK (unsigned response)
+                       unsigned_locator := "/" + TEST_HASH
+                       response := IssueRequest(
+                               &RequestTester{
+                                       method:       "PUT",
+                                       uri:          unsigned_locator,
+                                       request_body: TEST_BLOCK,
+                               })
+                       ExpectStatusCode(t,
+                               "TestPutHandlerBufferleak", http.StatusOK, response)
+                       ExpectBody(t,
+                               "TestPutHandlerBufferleak",
+                               TEST_HASH_PUT_RESPONSE, response)
+               }
+               ok <- true
+       }()
+       select {
+       case <-time.After(20 * time.Second):
+               // If the buffer pool leaks, the test goroutine hangs.
+               t.Fatal("test did not finish, assuming pool leaked")
+       case <-ok:
+       }
+}
+
+// Invoke the GetBlockHandler a bunch of times to test for bufferpool resource
+// leak.
+func TestGetHandlerNoBufferleak(t *testing.T) {
+       defer teardown()
+
+       // Prepare two test Keep volumes. Our block is stored on the second volume.
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
+       vols := KeepVM.AllWritable()
+       if err := vols[0].Put(TEST_HASH, TEST_BLOCK); err != nil {
+               t.Error(err)
+       }
+
+       ok := make(chan bool)
+       go func() {
+               for i := 0; i < maxBuffers+1; i += 1 {
+                       // Unauthenticated request, unsigned locator
+                       // => OK
+                       unsigned_locator := "/" + TEST_HASH
+                       response := IssueRequest(
+                               &RequestTester{
+                                       method: "GET",
+                                       uri:    unsigned_locator,
+                               })
+                       ExpectStatusCode(t,
+                               "Unauthenticated request, unsigned locator", http.StatusOK, response)
+                       ExpectBody(t,
+                               "Unauthenticated request, unsigned locator",
+                               string(TEST_BLOCK),
+                               response)
+               }
+               ok <- true
+       }()
+       select {
+       case <-time.After(20 * time.Second):
+               // If the buffer pool leaks, the test goroutine hangs.
+               t.Fatal("test did not finish, assuming pool leaked")
+       case <-ok:
+       }
+}
diff --git a/services/keepstore/handlers.go b/services/keepstore/handlers.go

index c7559a1bee313783372cfa9e88f918618459dae5..a86bb6a5b552887836e24cb858191bcbe920e479 100644 (file)
--- a/services/keepstore/handlers.go
+++ b/services/keepstore/handlers.go
@@ -8,7 +8,6 @@ package main
  // StatusHandler   (GET /status.json)
  
  import (
-       "bufio"
         "bytes"
         "container/list"
         "crypto/md5"
@@ -22,8 +21,7 @@ import (
         "regexp"
         "runtime"
         "strconv"
-       "strings"
-       "syscall"
+       "sync"
         "time"
  )
  
@@ -41,35 +39,19 @@ func MakeRESTRouter() *mux.Router {
  
         rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, PutBlockHandler).Methods("PUT")
         rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, DeleteHandler).Methods("DELETE")
-
-       // For IndexHandler we support:
-       //   /index           - returns all locators
-       //   /index/{prefix}  - returns all locators that begin with {prefix}
-       //      {prefix} is a string of hexadecimal digits between 0 and 32 digits.
-       //      If {prefix} is the empty string, return an index of all locators
-       //      (so /index and /index/ behave identically)
-       //      A client may supply a full 32-digit locator string, in which
-       //      case the server will return an index with either zero or one
-       //      entries. This usage allows a client to check whether a block is
-       //      present, and its size and upload time, without retrieving the
-       //      entire block.
-       //
+       // List all blocks stored here. Privileged client only.
         rest.HandleFunc(`/index`, IndexHandler).Methods("GET", "HEAD")
-       rest.HandleFunc(
-               `/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+       // List blocks stored here whose hash has the given prefix.
+       // Privileged client only.
+       rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+
+       // List volumes: path, device number, bytes used/avail.
         rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
  
-       // The PullHandler and TrashHandler process "PUT /pull" and "PUT
-       // /trash" requests from Data Manager.  These requests instruct
-       // Keep to replicate or delete blocks; see
-       // https://arvados.org/projects/arvados/wiki/Keep_Design_Doc
-       // for more details.
-       //
-       // Each handler parses the JSON list of block management requests
-       // in the message body, and replaces any existing pull queue or
-       // trash queue with their contentes.
-       //
+       // Replace the current pull queue.
         rest.HandleFunc(`/pull`, PullHandler).Methods("PUT")
+
+       // Replace the current trash queue.
         rest.HandleFunc(`/trash`, TrashHandler).Methods("PUT")
  
         // Any request which does not match any of these routes gets
@@ -83,143 +65,77 @@ func BadRequestHandler(w http.ResponseWriter, r *http.Request) {
         http.Error(w, BadRequestError.Error(), BadRequestError.HTTPCode)
  }
  
-// FindKeepVolumes scans all mounted volumes on the system for Keep
-// volumes, and returns a list of matching paths.
-//
-// A device is assumed to be a Keep volume if it is a normal or tmpfs
-// volume and has a "/keep" directory directly underneath the mount
-// point.
-//
-func FindKeepVolumes() []string {
-       vols := make([]string, 0)
-
-       if f, err := os.Open(PROC_MOUNTS); err != nil {
-               log.Fatalf("opening %s: %s\n", PROC_MOUNTS, err)
-       } else {
-               scanner := bufio.NewScanner(f)
-               for scanner.Scan() {
-                       args := strings.Fields(scanner.Text())
-                       dev, mount := args[0], args[1]
-                       if mount != "/" &&
-                               (dev == "tmpfs" || strings.HasPrefix(dev, "/dev/")) {
-                               keep := mount + "/keep"
-                               if st, err := os.Stat(keep); err == nil && st.IsDir() {
-                                       vols = append(vols, keep)
-                               }
-                       }
-               }
-               if err := scanner.Err(); err != nil {
-                       log.Fatal(err)
-               }
-       }
-       return vols
-}
-
  func GetBlockHandler(resp http.ResponseWriter, req *http.Request) {
-       hash := mux.Vars(req)["hash"]
-
-       hints := mux.Vars(req)["hints"]
-
-       // Parse the locator string and hints from the request.
-       // TODO(twp): implement a Locator type.
-       var signature, timestamp string
-       if hints != "" {
-               signature_pat, _ := regexp.Compile("^A([[:xdigit:]]+)@([[:xdigit:]]{8})$")
-               for _, hint := range strings.Split(hints, "+") {
-                       if match, _ := regexp.MatchString("^[[:digit:]]+$", hint); match {
-                               // Server ignores size hints
-                       } else if m := signature_pat.FindStringSubmatch(hint); m != nil {
-                               signature = m[1]
-                               timestamp = m[2]
-                       } else if match, _ := regexp.MatchString("^[[:upper:]]", hint); match {
-                               // Any unknown hint that starts with an uppercase letter is
-                               // presumed to be valid and ignored, to permit forward compatibility.
-                       } else {
-                               // Unknown format; not a valid locator.
-                               http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
-                               return
-                       }
-               }
-       }
-
-       // If permission checking is in effect, verify this
-       // request's permission signature.
         if enforce_permissions {
-               if signature == "" || timestamp == "" {
-                       http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
-                       return
-               } else if IsExpired(timestamp) {
-                       http.Error(resp, ExpiredError.Error(), ExpiredError.HTTPCode)
+               locator := req.URL.Path[1:] // strip leading slash
+               if err := VerifySignature(locator, GetApiToken(req)); err != nil {
+                       http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
                         return
-               } else {
-                       req_locator := req.URL.Path[1:] // strip leading slash
-                       if !VerifySignature(req_locator, GetApiToken(req)) {
-                               http.Error(resp, PermissionError.Error(), PermissionError.HTTPCode)
-                               return
-                       }
                 }
         }
  
-       block, err := GetBlock(hash, false)
-
-       // Garbage collect after each GET. Fixes #2865.
-       // TODO(twp): review Keep memory usage and see if there's
-       // a better way to do this than blindly garbage collecting
-       // after every block.
-       defer runtime.GC()
-
+       block, err := GetBlock(mux.Vars(req)["hash"], false)
         if err != nil {
                 // This type assertion is safe because the only errors
                 // GetBlock can return are DiskHashError or NotFoundError.
                 http.Error(resp, err.Error(), err.(*KeepError).HTTPCode)
                 return
         }
+       defer bufs.Put(block)
  
-       resp.Header().Set("Content-Length", fmt.Sprintf("%d", len(block)))
-
-       _, err = resp.Write(block)
-
-       return
+       resp.Header().Set("Content-Length", strconv.Itoa(len(block)))
+       resp.Header().Set("Content-Type", "application/octet-stream")
+       resp.Write(block)
  }
  
  func PutBlockHandler(resp http.ResponseWriter, req *http.Request) {
-       // Garbage collect after each PUT. Fixes #2865.
-       // See also GetBlockHandler.
-       defer runtime.GC()
-
         hash := mux.Vars(req)["hash"]
  
-       // Read the block data to be stored.
-       // If the request exceeds BLOCKSIZE bytes, issue a HTTP 500 error.
-       //
+       // Detect as many error conditions as possible before reading
+       // the body: avoid transmitting data that will not end up
+       // being written anyway.
+
+       if req.ContentLength == -1 {
+               http.Error(resp, SizeRequiredError.Error(), SizeRequiredError.HTTPCode)
+               return
+       }
+
         if req.ContentLength > BLOCKSIZE {
                 http.Error(resp, TooLongError.Error(), TooLongError.HTTPCode)
                 return
         }
  
-       buf := make([]byte, req.ContentLength)
-       nread, err := io.ReadFull(req.Body, buf)
+       if len(KeepVM.AllWritable()) == 0 {
+               http.Error(resp, FullError.Error(), FullError.HTTPCode)
+               return
+       }
+
+       buf := bufs.Get(int(req.ContentLength))
+       _, err := io.ReadFull(req.Body, buf)
         if err != nil {
                 http.Error(resp, err.Error(), 500)
-       } else if int64(nread) < req.ContentLength {
-               http.Error(resp, "request truncated", 500)
-       } else {
-               if err := PutBlock(buf, hash); err == nil {
-                       // Success; add a size hint, sign the locator if
-                       // possible, and return it to the client.
-                       return_hash := fmt.Sprintf("%s+%d", hash, len(buf))
-                       api_token := GetApiToken(req)
-                       if PermissionSecret != nil && api_token != "" {
-                               expiry := time.Now().Add(permission_ttl)
-                               return_hash = SignLocator(return_hash, api_token, expiry)
-                       }
-                       resp.Write([]byte(return_hash + "\n"))
-               } else {
-                       ke := err.(*KeepError)
-                       http.Error(resp, ke.Error(), ke.HTTPCode)
-               }
+               bufs.Put(buf)
+               return
+       }
+
+       err = PutBlock(buf, hash)
+       bufs.Put(buf)
+
+       if err != nil {
+               ke := err.(*KeepError)
+               http.Error(resp, ke.Error(), ke.HTTPCode)
+               return
         }
-       return
+
+       // Success; add a size hint, sign the locator if possible, and
+       // return it to the client.
+       return_hash := fmt.Sprintf("%s+%d", hash, req.ContentLength)
+       api_token := GetApiToken(req)
+       if PermissionSecret != nil && api_token != "" {
+               expiry := time.Now().Add(blob_signature_ttl)
+               return_hash = SignLocator(return_hash, api_token, expiry)
+       }
+       resp.Write([]byte(return_hash + "\n"))
  }
  
  // IndexHandler
@@ -234,11 +150,21 @@ func IndexHandler(resp http.ResponseWriter, req *http.Request) {
  
         prefix := mux.Vars(req)["prefix"]
  
-       var index string
-       for _, vol := range KeepVM.Volumes() {
-               index = index + vol.Index(prefix)
+       for _, vol := range KeepVM.AllReadable() {
+               if err := vol.IndexTo(prefix, resp); err != nil {
+                       // The only errors returned by IndexTo are
+                       // write errors returned by resp.Write(),
+                       // which probably means the client has
+                       // disconnected and this error will never be
+                       // reported to the client -- but it will
+                       // appear in our own error log.
+                       http.Error(resp, err.Error(), http.StatusInternalServerError)
+                       return
+               }
         }
-       resp.Write([]byte(index))
+       // An empty line at EOF is the only way the client can be
+       // assured the entire index was received.
+       resp.Write([]byte{'\n'})
  }
  
  // StatusHandler
@@ -260,60 +186,66 @@ type VolumeStatus struct {
         BytesUsed  uint64 `json:"bytes_used"`
  }
  
+type PoolStatus struct {
+       Alloc uint64 `json:"BytesAllocated"`
+       Cap   int    `json:"BuffersMax"`
+       Len   int    `json:"BuffersInUse"`
+}
+
  type NodeStatus struct {
-       Volumes []*VolumeStatus `json:"volumes"`
+       Volumes    []*VolumeStatus `json:"volumes"`
+       BufferPool PoolStatus
+       PullQueue  WorkQueueStatus
+       TrashQueue WorkQueueStatus
+       Memory     runtime.MemStats
  }
  
+var st NodeStatus
+var stLock sync.Mutex
+
  func StatusHandler(resp http.ResponseWriter, req *http.Request) {
-       st := GetNodeStatus()
-       if jstat, err := json.Marshal(st); err == nil {
+       stLock.Lock()
+       readNodeStatus(&st)
+       jstat, err := json.Marshal(&st)
+       stLock.Unlock()
+       if err == nil {
                 resp.Write(jstat)
         } else {
                 log.Printf("json.Marshal: %s\n", err)
-               log.Printf("NodeStatus = %v\n", st)
+               log.Printf("NodeStatus = %v\n", &st)
                 http.Error(resp, err.Error(), 500)
         }
  }
  
-// GetNodeStatus
-//     Returns a NodeStatus struct describing this Keep
-//     node's current status.
-//
-func GetNodeStatus() *NodeStatus {
-       st := new(NodeStatus)
-
-       st.Volumes = make([]*VolumeStatus, len(KeepVM.Volumes()))
-       for i, vol := range KeepVM.Volumes() {
-               st.Volumes[i] = vol.Status()
+// populate the given NodeStatus struct with current values.
+func readNodeStatus(st *NodeStatus) {
+       vols := KeepVM.AllReadable()
+       if cap(st.Volumes) < len(vols) {
+               st.Volumes = make([]*VolumeStatus, len(vols))
+       }
+       st.Volumes = st.Volumes[:0]
+       for _, vol := range vols {
+               if s := vol.Status(); s != nil {
+                       st.Volumes = append(st.Volumes, s)
+               }
         }
-       return st
+       st.BufferPool.Alloc = bufs.Alloc()
+       st.BufferPool.Cap = bufs.Cap()
+       st.BufferPool.Len = bufs.Len()
+       st.PullQueue = getWorkQueueStatus(pullq)
+       st.TrashQueue = getWorkQueueStatus(trashq)
+       runtime.ReadMemStats(&st.Memory)
  }
  
-// GetVolumeStatus
-//     Returns a VolumeStatus describing the requested volume.
-//
-func GetVolumeStatus(volume string) *VolumeStatus {
-       var fs syscall.Statfs_t
-       var devnum uint64
-
-       if fi, err := os.Stat(volume); err == nil {
-               devnum = fi.Sys().(*syscall.Stat_t).Dev
-       } else {
-               log.Printf("GetVolumeStatus: os.Stat: %s\n", err)
-               return nil
+// return a WorkQueueStatus for the given queue. If q is nil (which
+// should never happen except in test suites), return a zero status
+// value instead of crashing.
+func getWorkQueueStatus(q *WorkQueue) WorkQueueStatus {
+       if q == nil {
+               // This should only happen during tests.
+               return WorkQueueStatus{}
         }
-
-       err := syscall.Statfs(volume, &fs)
-       if err != nil {
-               log.Printf("GetVolumeStatus: statfs: %s\n", err)
-               return nil
-       }
-       // These calculations match the way df calculates disk usage:
-       // "free" space is measured by fs.Bavail, but "used" space
-       // uses fs.Blocks - fs.Bfree.
-       free := fs.Bavail * uint64(fs.Bsize)
-       used := (fs.Blocks - fs.Bfree) * uint64(fs.Bsize)
-       return &VolumeStatus{volume, devnum, free, used}
+       return q.Status()
  }
  
  // DeleteHandler processes DELETE requests.
@@ -358,14 +290,14 @@ func DeleteHandler(resp http.ResponseWriter, req *http.Request) {
                 return
         }
  
-       // Delete copies of this block from all available volumes.  Report
-       // how many blocks were successfully and unsuccessfully
-       // deleted.
+       // Delete copies of this block from all available volumes.
+       // Report how many blocks were successfully deleted, and how
+       // many were found on writable volumes but not deleted.
         var result struct {
                 Deleted int `json:"copies_deleted"`
                 Failed  int `json:"copies_failed"`
         }
-       for _, vol := range KeepVM.Volumes() {
+       for _, vol := range KeepVM.AllWritable() {
                 if err := vol.Delete(hash); err == nil {
                         result.Deleted++
                 } else if os.IsNotExist(err) {
@@ -445,7 +377,7 @@ func PullHandler(resp http.ResponseWriter, req *http.Request) {
         var pr []PullRequest
         r := json.NewDecoder(req.Body)
         if err := r.Decode(&pr); err != nil {
-               http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+               http.Error(resp, err.Error(), BadRequestError.HTTPCode)
                 return
         }
  
@@ -460,10 +392,6 @@ func PullHandler(resp http.ResponseWriter, req *http.Request) {
         for _, p := range pr {
                 plist.PushBack(p)
         }
-
-       if pullq == nil {
-               pullq = NewWorkQueue()
-       }
         pullq.ReplaceQueue(plist)
  }
  
@@ -483,7 +411,7 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
         var trash []TrashRequest
         r := json.NewDecoder(req.Body)
         if err := r.Decode(&trash); err != nil {
-               http.Error(resp, BadRequestError.Error(), BadRequestError.HTTPCode)
+               http.Error(resp, err.Error(), BadRequestError.HTTPCode)
                 return
         }
  
@@ -498,10 +426,6 @@ func TrashHandler(resp http.ResponseWriter, req *http.Request) {
         for _, t := range trash {
                 tlist.PushBack(t)
         }
-
-       if trashq == nil {
-               trashq = NewWorkQueue()
-       }
         trashq.ReplaceQueue(tlist)
  }
  
@@ -536,52 +460,56 @@ func GetBlock(hash string, update_timestamp bool) ([]byte, error) {
         // Attempt to read the requested hash from a keep volume.
         error_to_caller := NotFoundError
  
-       for _, vol := range KeepVM.Volumes() {
-               if buf, err := vol.Get(hash); err != nil {
-                       // IsNotExist is an expected error and may be ignored.
-                       // (If all volumes report IsNotExist, we return a NotFoundError)
-                       // All other errors should be logged but we continue trying to
-                       // read.
-                       switch {
-                       case os.IsNotExist(err):
-                               continue
-                       default:
+       var vols []Volume
+       if update_timestamp {
+               // Pointless to find the block on an unwritable volume
+               // because Touch() will fail -- this is as good as
+               // "not found" for purposes of callers who need to
+               // update_timestamp.
+               vols = KeepVM.AllWritable()
+       } else {
+               vols = KeepVM.AllReadable()
+       }
+
+       for _, vol := range vols {
+               buf, err := vol.Get(hash)
+               if err != nil {
+                       // IsNotExist is an expected error and may be
+                       // ignored. All other errors are logged. In
+                       // any case we continue trying to read other
+                       // volumes. If all volumes report IsNotExist,
+                       // we return a NotFoundError.
+                       if !os.IsNotExist(err) {
                                 log.Printf("GetBlock: reading %s: %s\n", hash, err)
                         }
-               } else {
-                       // Double check the file checksum.
-                       //
-                       filehash := fmt.Sprintf("%x", md5.Sum(buf))
-                       if filehash != hash {
-                               // TODO(twp): this condition probably represents a bad disk and
-                               // should raise major alarm bells for an administrator: e.g.
-                               // they should be sent directly to an event manager at high
-                               // priority or logged as urgent problems.
-                               //
-                               log.Printf("%s: checksum mismatch for request %s (actual %s)\n",
-                                       vol, hash, filehash)
-                               error_to_caller = DiskHashError
-                       } else {
-                               // Success!
-                               if error_to_caller != NotFoundError {
-                                       log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned\n",
-                                               vol, hash)
-                               }
-                               // Update the timestamp if the caller requested.
-                               // If we could not update the timestamp, continue looking on
-                               // other volumes.
-                               if update_timestamp {
-                                       if vol.Touch(hash) != nil {
-                                               continue
-                                       }
-                               }
-                               return buf, nil
+                       continue
+               }
+               // Check the file checksum.
+               //
+               filehash := fmt.Sprintf("%x", md5.Sum(buf))
+               if filehash != hash {
+                       // TODO: Try harder to tell a sysadmin about
+                       // this.
+                       log.Printf("%s: checksum mismatch for request %s (actual %s)\n",
+                               vol, hash, filehash)
+                       error_to_caller = DiskHashError
+                       bufs.Put(buf)
+                       continue
+               }
+               if error_to_caller == DiskHashError {
+                       log.Printf("%s: checksum mismatch for request %s but a good copy was found on another volume and returned",
+                               vol, hash)
+               }
+               if update_timestamp {
+                       if err := vol.Touch(hash); err != nil {
+                               error_to_caller = GenericError
+                               log.Printf("%s: Touch %s failed: %s",
+                                       vol, hash, error_to_caller)
+                               bufs.Put(buf)
+                               continue
                         }
                 }
-       }
-
-       if error_to_caller != NotFoundError {
-               log.Printf("%s: checksum mismatch, no good copy found\n", hash)
+               return buf, nil
         }
         return nil, error_to_caller
  }
@@ -628,6 +556,7 @@ func PutBlock(block []byte, hash string) error {
         // so there is nothing special to do if err != nil.
         //
         if oldblock, err := GetBlock(hash, true); err == nil {
+               defer bufs.Put(oldblock)
                 if bytes.Compare(block, oldblock) == 0 {
                         // The block already exists; return success.
                         return nil
@@ -638,56 +567,61 @@ func PutBlock(block []byte, hash string) error {
  
         // Choose a Keep volume to write to.
         // If this volume fails, try all of the volumes in order.
-       vol := KeepVM.Choose()
-       if err := vol.Put(hash, block); err == nil {
-               return nil // success!
-       } else {
-               allFull := true
-               for _, vol := range KeepVM.Volumes() {
-                       err := vol.Put(hash, block)
-                       if err == nil {
-                               return nil // success!
-                       }
-                       if err != FullError {
-                               // The volume is not full but the write did not succeed.
-                               // Report the error and continue trying.
-                               allFull = false
-                               log.Printf("%s: Write(%s): %s\n", vol, hash, err)
-                       }
+       if vol := KeepVM.NextWritable(); vol != nil {
+               if err := vol.Put(hash, block); err == nil {
+                       return nil // success!
                 }
+       }
  
-               if allFull {
-                       log.Printf("all Keep volumes full")
-                       return FullError
-               } else {
-                       log.Printf("all Keep volumes failed")
-                       return GenericError
+       writables := KeepVM.AllWritable()
+       if len(writables) == 0 {
+               log.Print("No writable volumes.")
+               return FullError
+       }
+
+       allFull := true
+       for _, vol := range writables {
+               err := vol.Put(hash, block)
+               if err == nil {
+                       return nil // success!
                 }
+               if err != FullError {
+                       // The volume is not full but the
+                       // write did not succeed.  Report the
+                       // error and continue trying.
+                       allFull = false
+                       log.Printf("%s: Write(%s): %s\n", vol, hash, err)
+               }
+       }
+
+       if allFull {
+               log.Print("All volumes are full.")
+               return FullError
+       } else {
+               // Already logged the non-full errors.
+               return GenericError
         }
  }
  
+var validLocatorRe = regexp.MustCompile(`^[0-9a-f]{32}$`)
+
  // IsValidLocator
  //     Return true if the specified string is a valid Keep locator.
  //     When Keep is extended to support hash types other than MD5,
  //     this should be updated to cover those as well.
  //
  func IsValidLocator(loc string) bool {
-       match, err := regexp.MatchString(`^[0-9a-f]{32}$`, loc)
-       if err == nil {
-               return match
-       }
-       log.Printf("IsValidLocator: %s\n", err)
-       return false
+       return validLocatorRe.MatchString(loc)
  }
  
+var authRe = regexp.MustCompile(`^OAuth2\s+(.*)`)
+
  // GetApiToken returns the OAuth2 token from the Authorization
  // header of a HTTP request, or an empty string if no matching
  // token is found.
  func GetApiToken(req *http.Request) string {
         if auth, ok := req.Header["Authorization"]; ok {
-               if pat, err := regexp.Compile(`^OAuth2\s+(.*)`); err != nil {
-                       log.Println(err)
-               } else if match := pat.FindStringSubmatch(auth[0]); match != nil {
+               if match := authRe.FindStringSubmatch(auth[0]); match != nil {
                         return match[1]
                 }
         }
diff --git a/services/keepstore/keepstore.go b/services/keepstore/keepstore.go

index 83974ffc2c753eb78b2e1912ea83768a28e0fdb4..3dfdce20e321bfe61ffb9b9f119e10f189ee2c48 100644 (file)
--- a/services/keepstore/keepstore.go
+++ b/services/keepstore/keepstore.go
@@ -1,7 +1,9 @@
  package main
  
  import (
+       "bufio"
         "bytes"
+       "errors"
         "flag"
         "fmt"
         "git.curoverse.com/arvados.git/sdk/go/keepclient"
@@ -37,22 +39,25 @@ var PROC_MOUNTS = "/proc/mounts"
  
  // enforce_permissions controls whether permission signatures
  // should be enforced (affecting GET and DELETE requests).
-// Initialized by the --enforce-permissions flag.
+// Initialized by the -enforce-permissions flag.
  var enforce_permissions bool
  
-// permission_ttl is the time duration for which new permission
+// blob_signature_ttl is the time duration for which new permission
  // signatures (returned by PUT requests) will be valid.
-// Initialized by the --permission-ttl flag.
-var permission_ttl time.Duration
+// Initialized by the -permission-ttl flag.
+var blob_signature_ttl time.Duration
  
  // data_manager_token represents the API token used by the
  // Data Manager, and is required on certain privileged operations.
-// Initialized by the --data-manager-token-file flag.
+// Initialized by the -data-manager-token-file flag.
  var data_manager_token string
  
  // never_delete can be used to prevent the DELETE handler from
  // actually deleting anything.
-var never_delete = false
+var never_delete = true
+
+var maxBuffers = 128
+var bufs *bufferPool
  
  // ==========
  // Error types.
@@ -73,7 +78,8 @@ var (
         NotFoundError       = &KeepError{404, "Not Found"}
         GenericError        = &KeepError{500, "Fail"}
         FullError           = &KeepError{503, "Full"}
-       TooLongError        = &KeepError{504, "Timeout"}
+       SizeRequiredError   = &KeepError{411, "Missing Content-Length"}
+       TooLongError        = &KeepError{413, "Block is too large"}
         MethodDisabledError = &KeepError{405, "Method disabled"}
  )
  
@@ -103,40 +109,108 @@ var KeepVM VolumeManager
  var pullq *WorkQueue
  var trashq *WorkQueue
  
+var (
+       flagSerializeIO bool
+       flagReadonly    bool
+)
+
+type volumeSet []Volume
+
+func (vs *volumeSet) Set(value string) error {
+       if dirs := strings.Split(value, ","); len(dirs) > 1 {
+               log.Print("DEPRECATED: using comma-separated volume list.")
+               for _, dir := range dirs {
+                       if err := vs.Set(dir); err != nil {
+                               return err
+                       }
+               }
+               return nil
+       }
+       if len(value) == 0 || value[0] != '/' {
+               return errors.New("Invalid volume: must begin with '/'.")
+       }
+       if _, err := os.Stat(value); err != nil {
+               return err
+       }
+       *vs = append(*vs, &UnixVolume{
+               root:      value,
+               serialize: flagSerializeIO,
+               readonly:  flagReadonly,
+       })
+       return nil
+}
+
+func (vs *volumeSet) String() string {
+       s := "["
+       for i, v := range *vs {
+               if i > 0 {
+                       s = s + " "
+               }
+               s = s + v.String()
+       }
+       return s + "]"
+}
+
+// Discover adds a volume for every directory named "keep" that is
+// located at the top level of a device- or tmpfs-backed mount point
+// other than "/". It returns the number of volumes added.
+func (vs *volumeSet) Discover() int {
+       added := 0
+       f, err := os.Open(PROC_MOUNTS)
+       if err != nil {
+               log.Fatalf("opening %s: %s", PROC_MOUNTS, err)
+       }
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               args := strings.Fields(scanner.Text())
+               if err := scanner.Err(); err != nil {
+                       log.Fatalf("reading %s: %s", PROC_MOUNTS, err)
+               }
+               dev, mount := args[0], args[1]
+               if mount == "/" {
+                       continue
+               }
+               if dev != "tmpfs" && !strings.HasPrefix(dev, "/dev/") {
+                       continue
+               }
+               keepdir := mount + "/keep"
+               if st, err := os.Stat(keepdir); err != nil || !st.IsDir() {
+                       continue
+               }
+               // Set the -readonly flag (but only for this volume)
+               // if the filesystem is mounted readonly.
+               flagReadonlyWas := flagReadonly
+               for _, fsopt := range strings.Split(args[3], ",") {
+                       if fsopt == "ro" {
+                               flagReadonly = true
+                               break
+                       }
+                       if fsopt == "rw" {
+                               break
+                       }
+               }
+               vs.Set(keepdir)
+               flagReadonly = flagReadonlyWas
+               added++
+       }
+       return added
+}
+
  // TODO(twp): continue moving as much code as possible out of main
  // so it can be effectively tested. Esp. handling and postprocessing
  // of command line flags (identifying Keep volumes and initializing
  // permission arguments).
  
  func main() {
-       log.Println("Keep started: pid", os.Getpid())
-
-       // Parse command-line flags:
-       //
-       // -listen=ipaddr:port
-       //    Interface on which to listen for requests. Use :port without
-       //    an ipaddr to listen on all network interfaces.
-       //    Examples:
-       //      -listen=127.0.0.1:4949
-       //      -listen=10.0.1.24:8000
-       //      -listen=:25107 (to listen to port 25107 on all interfaces)
-       //
-       // -volumes
-       //    A comma-separated list of directories to use as Keep volumes.
-       //    Example:
-       //      -volumes=/var/keep01,/var/keep02,/var/keep03/subdir
-       //
-       //    If -volumes is empty or is not present, Keep will select volumes
-       //    by looking at currently mounted filesystems for /keep top-level
-       //    directories.
+       log.Println("keepstore starting, pid", os.Getpid())
+       defer log.Println("keepstore exiting, pid", os.Getpid())
  
         var (
                 data_manager_token_file string
                 listen                  string
-               permission_key_file     string
+               blob_signing_key_file   string
                 permission_ttl_sec      int
-               serialize_io            bool
-               volumearg               string
+               volumes                 volumeSet
                 pidfile                 string
         )
         flag.StringVar(
@@ -154,75 +228,108 @@ func main() {
                 &listen,
                 "listen",
                 DEFAULT_ADDR,
-               "Interface on which to listen for requests, in the format "+
-                       "ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port "+
-                       "to listen on all network interfaces.")
+               "Listening address, in the form \"host:port\". e.g., 10.0.1.24:8000. Omit the host part to listen on all interfaces.")
         flag.BoolVar(
                 &never_delete,
                 "never-delete",
-               false,
+               true,
                 "If set, nothing will be deleted. HTTP 405 will be returned "+
                         "for valid DELETE requests.")
         flag.StringVar(
-               &permission_key_file,
+               &blob_signing_key_file,
                 "permission-key-file",
                 "",
+               "Synonym for -blob-signing-key-file.")
+       flag.StringVar(
+               &blob_signing_key_file,
+               "blob-signing-key-file",
+               "",
                 "File containing the secret key for generating and verifying "+
-                       "permission signatures.")
+                       "blob permission signatures.")
         flag.IntVar(
                 &permission_ttl_sec,
                 "permission-ttl",
-               1209600,
-               "Expiration time (in seconds) for newly generated permission "+
-                       "signatures.")
+               0,
+               "Synonym for -blob-signature-ttl.")
+       flag.IntVar(
+               &permission_ttl_sec,
+               "blob-signature-ttl",
+               int(time.Duration(2*7*24*time.Hour).Seconds()),
+               "Lifetime of blob permission signatures. "+
+                       "See services/api/config/application.default.yml.")
         flag.BoolVar(
-               &serialize_io,
+               &flagSerializeIO,
                 "serialize",
                 false,
-               "If set, all read and write operations on local Keep volumes will "+
-                       "be serialized.")
-       flag.StringVar(
-               &volumearg,
+               "Serialize read and write operations on the following volumes.")
+       flag.BoolVar(
+               &flagReadonly,
+               "readonly",
+               false,
+               "Do not write, delete, or touch anything on the following volumes.")
+       flag.Var(
+               &volumes,
                 "volumes",
-               "",
-               "Comma-separated list of directories to use for Keep volumes, "+
-                       "e.g. -volumes=/var/keep1,/var/keep2. If empty or not "+
-                       "supplied, Keep will scan mounted filesystems for volumes "+
-                       "with a /keep top-level directory.")
-
+               "Deprecated synonym for -volume.")
+       flag.Var(
+               &volumes,
+               "volume",
+               "Local storage directory. Can be given more than once to add multiple directories. If none are supplied, the default is to use all directories named \"keep\" that exist in the top level directory of a mount point at startup time. Can be a comma-separated list, but this is deprecated: use multiple -volume arguments instead.")
         flag.StringVar(
                 &pidfile,
                 "pid",
                 "",
-               "Path to write pid file")
+               "Path to write pid file during startup. This file is kept open and locked with LOCK_EX until keepstore exits, so `fuser -k pidfile` is one way to shut down. Exit immediately if there is an error opening, locking, or writing the pid file.")
+       flag.IntVar(
+               &maxBuffers,
+               "max-buffers",
+               maxBuffers,
+               fmt.Sprintf("Maximum RAM to use for data buffers, given in multiples of block size (%d MiB). When this limit is reached, HTTP requests requiring buffers (like GET and PUT) will wait for buffer space to be released.", BLOCKSIZE>>20))
  
         flag.Parse()
  
-       // Look for local keep volumes.
-       var keepvols []string
-       if volumearg == "" {
-               // TODO(twp): decide whether this is desirable default behavior.
-               // In production we may want to require the admin to specify
-               // Keep volumes explicitly.
-               keepvols = FindKeepVolumes()
-       } else {
-               keepvols = strings.Split(volumearg, ",")
+       if never_delete != true {
+               log.Fatal("never_delete must be true, see #6221")
         }
  
-       // Check that the specified volumes actually exist.
-       var goodvols []Volume = nil
-       for _, v := range keepvols {
-               if _, err := os.Stat(v); err == nil {
-                       log.Println("adding Keep volume:", v)
-                       newvol := MakeUnixVolume(v, serialize_io)
-                       goodvols = append(goodvols, &newvol)
-               } else {
-                       log.Printf("bad Keep volume: %s\n", err)
+       if maxBuffers < 0 {
+               log.Fatal("-max-buffers must be greater than zero.")
+       }
+       bufs = newBufferPool(maxBuffers, BLOCKSIZE)
+
+       if pidfile != "" {
+               f, err := os.OpenFile(pidfile, os.O_RDWR|os.O_CREATE, 0777)
+               if err != nil {
+                       log.Fatalf("open pidfile (%s): %s", pidfile, err)
+               }
+               err = syscall.Flock(int(f.Fd()), syscall.LOCK_EX|syscall.LOCK_NB)
+               if err != nil {
+                       log.Fatalf("flock pidfile (%s): %s", pidfile, err)
+               }
+               err = f.Truncate(0)
+               if err != nil {
+                       log.Fatalf("truncate pidfile (%s): %s", pidfile, err)
+               }
+               _, err = fmt.Fprint(f, os.Getpid())
+               if err != nil {
+                       log.Fatalf("write pidfile (%s): %s", pidfile, err)
+               }
+               err = f.Sync()
+               if err != nil {
+                       log.Fatalf("sync pidfile (%s): %s", pidfile, err)
+               }
+               defer f.Close()
+               defer os.Remove(pidfile)
+       }
+
+       if len(volumes) == 0 {
+               if volumes.Discover() == 0 {
+                       log.Fatal("No volumes found.")
                 }
         }
  
-       if len(goodvols) == 0 {
-               log.Fatal("could not find any keep volumes")
+       for _, v := range volumes {
+               log.Printf("Using volume %v (writable=%v)", v, v.Writable())
         }
  
         // Initialize data manager token and permission key.
@@ -235,33 +342,30 @@ func main() {
                         log.Fatalf("reading data manager token: %s\n", err)
                 }
         }
-       if permission_key_file != "" {
-               if buf, err := ioutil.ReadFile(permission_key_file); err == nil {
+       if blob_signing_key_file != "" {
+               if buf, err := ioutil.ReadFile(blob_signing_key_file); err == nil {
                         PermissionSecret = bytes.TrimSpace(buf)
                 } else {
                         log.Fatalf("reading permission key: %s\n", err)
                 }
         }
  
-       // Initialize permission TTL
-       permission_ttl = time.Duration(permission_ttl_sec) * time.Second
+       blob_signature_ttl = time.Duration(permission_ttl_sec) * time.Second
  
-       // If --enforce-permissions is true, we must have a permission key
-       // to continue.
         if PermissionSecret == nil {
                 if enforce_permissions {
-                       log.Fatal("--enforce-permissions requires a permission key")
+                       log.Fatal("-enforce-permissions requires a permission key")
                 } else {
                         log.Println("Running without a PermissionSecret. Block locators " +
                                 "returned by this server will not be signed, and will be rejected " +
                                 "by a server that enforces permissions.")
-                       log.Println("To fix this, run Keep with --permission-key-file=<path> " +
-                               "to define the location of a file containing the permission key.")
+                       log.Println("To fix this, use the -blob-signing-key-file flag " +
+                               "to specify the file containing the permission key.")
                 }
         }
  
         // Start a round-robin VolumeManager with the volumes we have found.
-       KeepVM = MakeRRVolumeManager(goodvols)
+       KeepVM = MakeRRVolumeManager(volumes)
  
         // Tell the built-in HTTP server to direct all requests to the REST router.
         loggingRouter := MakeLoggingRESTRouter()
@@ -276,16 +380,21 @@ func main() {
         }
  
         // Initialize Pull queue and worker
-       keepClient := keepclient.KeepClient{
+       keepClient := &keepclient.KeepClient{
                 Arvados:       nil,
                 Want_replicas: 1,
                 Using_proxy:   true,
                 Client:        &http.Client{},
         }
  
+       // Initialize the pullq and worker
         pullq = NewWorkQueue()
         go RunPullWorker(pullq, keepClient)
  
+       // Initialize the trashq and worker
+       trashq = NewWorkQueue()
+       go RunTrashWorker(trashq)
+
         // Shut down the server gracefully (by closing the listener)
         // if SIGTERM is received.
         term := make(chan os.Signal, 1)
@@ -295,24 +404,9 @@ func main() {
                 listener.Close()
         }(term)
         signal.Notify(term, syscall.SIGTERM)
+       signal.Notify(term, syscall.SIGINT)
  
-       if pidfile != "" {
-               f, err := os.Create(pidfile)
-               if err == nil {
-                       fmt.Fprint(f, os.Getpid())
-                       f.Close()
-               } else {
-                       log.Printf("Error writing pid file (%s): %s", pidfile, err.Error())
-               }
-       }
-
-       // Start listening for requests.
+       log.Println("listening at", listen)
         srv := &http.Server{Addr: listen}
         srv.Serve(listener)
-
-       log.Println("shutting down")
-
-       if pidfile != "" {
-               os.Remove(pidfile)
-       }
  }
diff --git a/services/keepstore/keepstore_test.go b/services/keepstore/keepstore_test.go

index 686f502500cec1e6e7ad3508f2978ef55522a6dc..e01b01363d4e2de2f77b854ea5789fb996276234 100644 (file)
--- a/services/keepstore/keepstore_test.go
+++ b/services/keepstore/keepstore_test.go
@@ -52,9 +52,9 @@ func TestGetBlock(t *testing.T) {
  
         // Prepare two test Keep volumes. Our block is stored on the second volume.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
         if err := vols[1].Put(TEST_HASH, TEST_BLOCK); err != nil {
                 t.Error(err)
         }
@@ -77,7 +77,7 @@ func TestGetBlockMissing(t *testing.T) {
  
         // Create two empty test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
         // Check that GetBlock returns failure.
         result, err := GetBlock(TEST_HASH, false)
@@ -95,9 +95,9 @@ func TestGetBlockCorrupt(t *testing.T) {
  
         // Create two test Keep volumes and store a corrupt block in one.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
         vols[0].Put(TEST_HASH, BAD_BLOCK)
  
         // Check that GetBlock returns failure.
@@ -119,15 +119,15 @@ func TestPutBlockOK(t *testing.T) {
  
         // Create two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
         // Check that PutBlock stores the data as expected.
         if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
                 t.Fatalf("PutBlock: %v", err)
         }
  
-       vols := KeepVM.Volumes()
-       result, err := vols[0].Get(TEST_HASH)
+       vols := KeepVM.AllReadable()
+       result, err := vols[1].Get(TEST_HASH)
         if err != nil {
                 t.Fatalf("Volume #0 Get returned error: %v", err)
         }
@@ -146,9 +146,9 @@ func TestPutBlockOneVol(t *testing.T) {
  
         // Create two test Keep volumes, but cripple one of them.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
         vols[0].(*MockVolume).Bad = true
  
         // Check that PutBlock stores the data as expected.
@@ -176,7 +176,7 @@ func TestPutBlockMD5Fail(t *testing.T) {
  
         // Create two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
         // Check that PutBlock returns the expected error when the hash does
         // not match the block.
@@ -200,10 +200,10 @@ func TestPutBlockCorrupt(t *testing.T) {
  
         // Create two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
         // Store a corrupted block under TEST_HASH.
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllWritable()
         vols[0].Put(TEST_HASH, BAD_BLOCK)
         if err := PutBlock(TEST_BLOCK, TEST_HASH); err != nil {
                 t.Errorf("PutBlock: %v", err)
@@ -231,7 +231,7 @@ func TestPutBlockCollision(t *testing.T) {
  
         // Prepare two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
         // Store one block, then attempt to store the other. Confirm that
         // PutBlock reported a CollisionError.
@@ -254,8 +254,8 @@ func TestPutBlockTouchFails(t *testing.T) {
  
         // Prepare two test Keep volumes.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
-       vols := KeepVM.Volumes()
+       defer KeepVM.Close()
+       vols := KeepVM.AllWritable()
  
         // Store a block and then make the underlying volume bad,
         // so a subsequent attempt to update the file timestamp
@@ -293,29 +293,16 @@ func TestPutBlockTouchFails(t *testing.T) {
         }
  }
  
-// ========================================
-// FindKeepVolumes tests.
-// ========================================
-
-// TestFindKeepVolumes
-//     Confirms that FindKeepVolumes finds tmpfs volumes with "/keep"
-//     directories at the top level.
-//
-func TestFindKeepVolumes(t *testing.T) {
-       var tempVols [2]string
+func TestDiscoverTmpfs(t *testing.T) {
+       var tempVols [4]string
         var err error
  
-       defer func() {
-               for _, path := range tempVols {
-                       os.RemoveAll(path)
-               }
-       }()
-
-       // Create two directories suitable for using as keep volumes.
+       // Create some directories suitable for using as keep volumes.
         for i := range tempVols {
                 if tempVols[i], err = ioutil.TempDir("", "findvol"); err != nil {
                         t.Fatal(err)
                 }
+               defer os.RemoveAll(tempVols[i])
                 tempVols[i] = tempVols[i] + "/keep"
                 if err = os.Mkdir(tempVols[i], 0755); err != nil {
                         t.Fatal(err)
@@ -323,53 +310,69 @@ func TestFindKeepVolumes(t *testing.T) {
         }
  
         // Set up a bogus PROC_MOUNTS file.
-       if f, err := ioutil.TempFile("", "keeptest"); err == nil {
-               for _, vol := range tempVols {
-                       fmt.Fprintf(f, "tmpfs %s tmpfs opts\n", path.Dir(vol))
+       f, err := ioutil.TempFile("", "keeptest")
+       if err != nil {
+               t.Fatal(err)
+       }
+       defer os.Remove(f.Name())
+       for i, vol := range tempVols {
+               // Add readonly mount points at odd indexes.
+               var opts string
+               switch i % 2 {
+               case 0:
+                       opts = "rw,nosuid,nodev,noexec"
+               case 1:
+                       opts = "nosuid,nodev,noexec,ro"
                 }
-               f.Close()
-               PROC_MOUNTS = f.Name()
-
-               // Check that FindKeepVolumes finds the temp volumes.
-               resultVols := FindKeepVolumes()
-               if len(tempVols) != len(resultVols) {
-                       t.Fatalf("set up %d volumes, FindKeepVolumes found %d\n",
-                               len(tempVols), len(resultVols))
+               fmt.Fprintf(f, "tmpfs %s tmpfs %s 0 0\n", path.Dir(vol), opts)
+       }
+       f.Close()
+       PROC_MOUNTS = f.Name()
+
+       var resultVols volumeSet
+       added := resultVols.Discover()
+
+       if added != len(resultVols) {
+               t.Errorf("Discover returned %d, but added %d volumes",
+                       added, len(resultVols))
+       }
+       if added != len(tempVols) {
+               t.Errorf("Discover returned %d but we set up %d volumes",
+                       added, len(tempVols))
+       }
+       for i, tmpdir := range tempVols {
+               if tmpdir != resultVols[i].(*UnixVolume).root {
+                       t.Errorf("Discover returned %s, expected %s\n",
+                               resultVols[i].(*UnixVolume).root, tmpdir)
                 }
-               for i := range tempVols {
-                       if tempVols[i] != resultVols[i] {
-                               t.Errorf("FindKeepVolumes returned %s, expected %s\n",
-                                       resultVols[i], tempVols[i])
-                       }
+               if expectReadonly := i%2 == 1; expectReadonly != resultVols[i].(*UnixVolume).readonly {
+                       t.Errorf("Discover added %s with readonly=%v, should be %v",
+                               tmpdir, !expectReadonly, expectReadonly)
                 }
-
-               os.Remove(f.Name())
         }
  }
  
-// TestFindKeepVolumesFail
-//     When no Keep volumes are present, FindKeepVolumes returns an empty slice.
-//
-func TestFindKeepVolumesFail(t *testing.T) {
+func TestDiscoverNone(t *testing.T) {
         defer teardown()
  
         // Set up a bogus PROC_MOUNTS file with no Keep vols.
-       if f, err := ioutil.TempFile("", "keeptest"); err == nil {
-               fmt.Fprintln(f, "rootfs / rootfs opts 0 0")
-               fmt.Fprintln(f, "sysfs /sys sysfs opts 0 0")
-               fmt.Fprintln(f, "proc /proc proc opts 0 0")
-               fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
-               fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
-               f.Close()
-               PROC_MOUNTS = f.Name()
-
-               // Check that FindKeepVolumes returns an empty array.
-               resultVols := FindKeepVolumes()
-               if len(resultVols) != 0 {
-                       t.Fatalf("FindKeepVolumes returned %v", resultVols)
-               }
-
-               os.Remove(PROC_MOUNTS)
+       f, err := ioutil.TempFile("", "keeptest")
+       if err != nil {
+               t.Fatal(err)
+       }
+       defer os.Remove(f.Name())
+       fmt.Fprintln(f, "rootfs / rootfs opts 0 0")
+       fmt.Fprintln(f, "sysfs /sys sysfs opts 0 0")
+       fmt.Fprintln(f, "proc /proc proc opts 0 0")
+       fmt.Fprintln(f, "udev /dev devtmpfs opts 0 0")
+       fmt.Fprintln(f, "devpts /dev/pts devpts opts 0 0")
+       f.Close()
+       PROC_MOUNTS = f.Name()
+
+       var resultVols volumeSet
+       added := resultVols.Discover()
+       if added != 0 || len(resultVols) != 0 {
+               t.Fatalf("got %d, %v; expected 0, []", added, resultVols)
         }
  }
  
@@ -382,17 +385,19 @@ func TestIndex(t *testing.T) {
         // Include multiple blocks on different volumes, and
         // some metadata files.
         KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
+       defer KeepVM.Close()
  
-       vols := KeepVM.Volumes()
+       vols := KeepVM.AllReadable()
         vols[0].Put(TEST_HASH, TEST_BLOCK)
         vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
         vols[0].Put(TEST_HASH_3, TEST_BLOCK_3)
         vols[0].Put(TEST_HASH+".meta", []byte("metadata"))
         vols[1].Put(TEST_HASH_2+".meta", []byte("metadata"))
  
-       index := vols[0].Index("") + vols[1].Index("")
-       index_rows := strings.Split(index, "\n")
+       buf := new(bytes.Buffer)
+       vols[0].IndexTo("", buf)
+       vols[1].IndexTo("", buf)
+       index_rows := strings.Split(string(buf.Bytes()), "\n")
         sort.Strings(index_rows)
         sorted_index := strings.Join(index_rows, "\n")
         expected := `^\n` + TEST_HASH + `\+\d+ \d+\n` +
@@ -402,58 +407,19 @@ func TestIndex(t *testing.T) {
         match, err := regexp.MatchString(expected, sorted_index)
         if err == nil {
                 if !match {
-                       t.Errorf("IndexLocators returned:\n%s", index)
+                       t.Errorf("IndexLocators returned:\n%s", string(buf.Bytes()))
                 }
         } else {
                 t.Errorf("regexp.MatchString: %s", err)
         }
  }
  
-// TestNodeStatus
-//     Test that GetNodeStatus returns valid info about available volumes.
-//
-//     TODO(twp): set up appropriate interfaces to permit more rigorous
-//     testing.
-//
-func TestNodeStatus(t *testing.T) {
-       defer teardown()
-
-       // Set up test Keep volumes with some blocks.
-       KeepVM = MakeTestVolumeManager(2)
-       defer func() { KeepVM.Quit() }()
-
-       vols := KeepVM.Volumes()
-       vols[0].Put(TEST_HASH, TEST_BLOCK)
-       vols[1].Put(TEST_HASH_2, TEST_BLOCK_2)
-
-       // Get node status and make a basic sanity check.
-       st := GetNodeStatus()
-       for i := range vols {
-               volinfo := st.Volumes[i]
-               mtp := volinfo.MountPoint
-               if mtp != "/bogo" {
-                       t.Errorf("GetNodeStatus mount_point %s, expected /bogo", mtp)
-               }
-               if volinfo.DeviceNum == 0 {
-                       t.Errorf("uninitialized device_num in %v", volinfo)
-               }
-               if volinfo.BytesFree == 0 {
-                       t.Errorf("uninitialized bytes_free in %v", volinfo)
-               }
-               if volinfo.BytesUsed == 0 {
-                       t.Errorf("uninitialized bytes_used in %v", volinfo)
-               }
-       }
-}
-
  // ========================================
  // Helper functions for unit tests.
  // ========================================
  
-// MakeTestVolumeManager
-//     Creates and returns a RRVolumeManager with the specified number
-//     of MockVolumes.
-//
+// MakeTestVolumeManager returns a RRVolumeManager with the specified
+// number of MockVolumes.
  func MakeTestVolumeManager(num_volumes int) VolumeManager {
         vols := make([]Volume, num_volumes)
         for i := range vols {
@@ -462,9 +428,7 @@ func MakeTestVolumeManager(num_volumes int) VolumeManager {
         return MakeRRVolumeManager(vols)
  }
  
-// teardown
-//     Cleanup to perform after each test.
-//
+// teardown cleans up after each test.
  func teardown() {
         data_manager_token = ""
         enforce_permissions = false
diff --git a/services/keepstore/logging_router.go b/services/keepstore/logging_router.go

index e30df876322ab9c09acdbab110f851ea0a954e20..b622d1d3eefd2eb3bedfb5e4260976cb2075f85a 100644 (file)
--- a/services/keepstore/logging_router.go
+++ b/services/keepstore/logging_router.go
@@ -8,6 +8,7 @@ import (
         "log"
         "net/http"
         "strings"
+       "time"
  )
  
  type LoggingResponseWriter struct {
@@ -40,12 +41,13 @@ func MakeLoggingRESTRouter() *LoggingRESTRouter {
  }
  
  func (loggingRouter *LoggingRESTRouter) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
-       loggingWriter := LoggingResponseWriter{200, 0, resp, ""}
+       t0 := time.Now()
+       loggingWriter := LoggingResponseWriter{http.StatusOK, 0, resp, ""}
         loggingRouter.router.ServeHTTP(&loggingWriter, req)
-       statusText := "OK"
+       statusText := http.StatusText(loggingWriter.Status)
         if loggingWriter.Status >= 400 {
                 statusText = strings.Replace(loggingWriter.ResponseBody, "\n", "", -1)
         }
-       log.Printf("[%s] %s %s %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], loggingWriter.Status, loggingWriter.Length, statusText)
+       log.Printf("[%s] %s %s %.6fs %d %d \"%s\"", req.RemoteAddr, req.Method, req.URL.Path[1:], time.Since(t0).Seconds(), loggingWriter.Status, loggingWriter.Length, statusText)
  
  }
diff --git a/services/keepstore/perms.go b/services/keepstore/perms.go

index 1048f53130315525da0a80c1214207a9eb101cd5..65160b1868913638e8315a266e0b3736ecfbe14c 100644 (file)
--- a/services/keepstore/perms.go
+++ b/services/keepstore/perms.go
@@ -82,22 +82,29 @@ func SignLocator(blob_locator string, api_token string, expiry time.Time) string
  
  var signedLocatorRe = regexp.MustCompile(`^([[:xdigit:]]{32}).*\+A([[:xdigit:]]{40})@([[:xdigit:]]{8})`)
  
-// VerifySignature returns true if the signature on the signed_locator
-// can be verified using the given api_token.
-func VerifySignature(signed_locator string, api_token string) bool {
+// VerifySignature returns nil if the signature on the signed_locator
+// can be verified using the given api_token. Otherwise it returns
+// either ExpiredError (if the timestamp has expired, which is
+// something the client could have figured out independently) or
+// PermissionError.
+func VerifySignature(signed_locator string, api_token string) error {
         matches := signedLocatorRe.FindStringSubmatch(signed_locator)
         if matches == nil {
                 // Could not find a permission signature at all
-               return false
+               return PermissionError
         }
         blob_hash := matches[1]
         sig_hex := matches[2]
         exp_hex := matches[3]
-       if exp_time, err := ParseHexTimestamp(exp_hex); err != nil || exp_time.Before(time.Now()) {
-               // Signature is expired, or timestamp is unparseable
-               return false
+       if exp_time, err := ParseHexTimestamp(exp_hex); err != nil {
+               return PermissionError
+       } else if exp_time.Before(time.Now()) {
+               return ExpiredError
         }
-       return sig_hex == MakePermSignature(blob_hash, api_token, exp_hex)
+       if sig_hex != MakePermSignature(blob_hash, api_token, exp_hex) {
+               return PermissionError
+       }
+       return nil
  }
  
  func ParseHexTimestamp(timestamp_hex string) (ts time.Time, err error) {
diff --git a/services/keepstore/perms_test.go b/services/keepstore/perms_test.go

index d0081cd01014b69abf9910ffad059077d19549ee..e43cb8dcd99bf39d4318153525b4f46c660239ce 100644 (file)
--- a/services/keepstore/perms_test.go
+++ b/services/keepstore/perms_test.go
@@ -39,7 +39,7 @@ func TestVerifySignature(t *testing.T) {
         PermissionSecret = []byte(known_key)
         defer func() { PermissionSecret = nil }()
  
-       if !VerifySignature(known_signed_locator, known_token) {
+       if VerifySignature(known_signed_locator, known_token) != nil {
                 t.Fail()
         }
  }
@@ -48,15 +48,15 @@ func TestVerifySignatureExtraHints(t *testing.T) {
         PermissionSecret = []byte(known_key)
         defer func() { PermissionSecret = nil }()
  
-       if !VerifySignature(known_locator + "+K@xyzzy" + known_sig_hint, known_token) {
+       if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint, known_token) != nil {
                 t.Fatal("Verify cannot handle hint before permission signature")
         }
  
-       if !VerifySignature(known_locator + known_sig_hint + "+Zfoo", known_token) {
+       if VerifySignature(known_locator+known_sig_hint+"+Zfoo", known_token) != nil {
                 t.Fatal("Verify cannot handle hint after permission signature")
         }
  
-       if !VerifySignature(known_locator + "+K@xyzzy" + known_sig_hint + "+Zfoo", known_token) {
+       if VerifySignature(known_locator+"+K@xyzzy"+known_sig_hint+"+Zfoo", known_token) != nil {
                 t.Fatal("Verify cannot handle hints around permission signature")
         }
  }
@@ -66,11 +66,11 @@ func TestVerifySignatureWrongSize(t *testing.T) {
         PermissionSecret = []byte(known_key)
         defer func() { PermissionSecret = nil }()
  
-       if !VerifySignature(known_hash + "+999999" + known_sig_hint, known_token) {
+       if VerifySignature(known_hash+"+999999"+known_sig_hint, known_token) != nil {
                 t.Fatal("Verify cannot handle incorrect size hint")
         }
  
-       if !VerifySignature(known_hash + known_sig_hint, known_token) {
+       if VerifySignature(known_hash+known_sig_hint, known_token) != nil {
                 t.Fatal("Verify cannot handle missing size hint")
         }
  }
@@ -80,7 +80,7 @@ func TestVerifySignatureBadSig(t *testing.T) {
         defer func() { PermissionSecret = nil }()
  
         bad_locator := known_locator + "+Aaaaaaaaaaaaaaaa@" + known_timestamp
-       if VerifySignature(bad_locator, known_token) {
+       if VerifySignature(bad_locator, known_token) != PermissionError {
                 t.Fail()
         }
  }
@@ -89,8 +89,8 @@ func TestVerifySignatureBadTimestamp(t *testing.T) {
         PermissionSecret = []byte(known_key)
         defer func() { PermissionSecret = nil }()
  
-       bad_locator := known_locator + "+A" + known_signature + "@00000000"
-       if VerifySignature(bad_locator, known_token) {
+       bad_locator := known_locator + "+A" + known_signature + "@OOOOOOOl"
+       if VerifySignature(bad_locator, known_token) != PermissionError {
                 t.Fail()
         }
  }
@@ -99,7 +99,7 @@ func TestVerifySignatureBadSecret(t *testing.T) {
         PermissionSecret = []byte("00000000000000000000")
         defer func() { PermissionSecret = nil }()
  
-       if VerifySignature(known_signed_locator, known_token) {
+       if VerifySignature(known_signed_locator, known_token) != PermissionError {
                 t.Fail()
         }
  }
@@ -108,7 +108,7 @@ func TestVerifySignatureBadToken(t *testing.T) {
         PermissionSecret = []byte(known_key)
         defer func() { PermissionSecret = nil }()
  
-       if VerifySignature(known_signed_locator, "00000000") {
+       if VerifySignature(known_signed_locator, "00000000") != PermissionError {
                 t.Fail()
         }
  }
@@ -119,7 +119,7 @@ func TestVerifySignatureExpired(t *testing.T) {
  
         yesterday := time.Now().AddDate(0, 0, -1)
         expired_locator := SignLocator(known_hash, known_token, yesterday)
-       if VerifySignature(expired_locator, known_token) {
+       if VerifySignature(expired_locator, known_token) != ExpiredError {
                 t.Fail()
         }
  }
diff --git a/services/keepstore/pull_worker.go b/services/keepstore/pull_worker.go

index fac4bb15030eaaa8334bf375dc2a9baa4695fbb0..acf861119f47fd1b765bcad461d826c369151968 100644 (file)
--- a/services/keepstore/pull_worker.go
+++ b/services/keepstore/pull_worker.go
@@ -19,11 +19,12 @@ import (
                         Skip the rest of the servers if no errors
                 Repeat
  */
-func RunPullWorker(pullq *WorkQueue, keepClient keepclient.KeepClient) {
+func RunPullWorker(pullq *WorkQueue, keepClient *keepclient.KeepClient) {
         nextItem := pullq.NextItem
         for item := range nextItem {
                 pullRequest := item.(PullRequest)
                 err := PullItemAndProcess(item.(PullRequest), GenerateRandomApiToken(), keepClient)
+               pullq.DoneItem <- struct{}{}
                 if err == nil {
                         log.Printf("Pull %s success", pullRequest)
                 } else {
@@ -39,14 +40,14 @@ func RunPullWorker(pullq *WorkQueue, keepClient keepclient.KeepClient) {
                 Using this token & signature, retrieve the given block.
                 Write to storage
  */
-func PullItemAndProcess(pullRequest PullRequest, token string, keepClient keepclient.KeepClient) (err error) {
+func PullItemAndProcess(pullRequest PullRequest, token string, keepClient *keepclient.KeepClient) (err error) {
         keepClient.Arvados.ApiToken = token
  
         service_roots := make(map[string]string)
         for _, addr := range pullRequest.Servers {
                 service_roots[addr] = addr
         }
-       keepClient.SetServiceRoots(service_roots)
+       keepClient.SetServiceRoots(service_roots, nil, nil)
  
         // Generate signature with a random token
         expires_at := time.Now().Add(60 * time.Second)
@@ -75,7 +76,7 @@ func PullItemAndProcess(pullRequest PullRequest, token string, keepClient keepcl
  }
  
  // Fetch the content for the given locator using keepclient.
-var GetContent = func(signedLocator string, keepClient keepclient.KeepClient) (
+var GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
         reader io.ReadCloser, contentLength int64, url string, err error) {
         reader, blocklen, url, err := keepClient.Get(signedLocator)
         return reader, blocklen, url, err
diff --git a/services/keepstore/pull_worker_integration_test.go b/services/keepstore/pull_worker_integration_test.go

index b293cf92ea87260dd487e5e9d190a85aca779708..3e57407369c0dccf8216e3e0835820c0bc419b55 100644 (file)
--- a/services/keepstore/pull_worker_integration_test.go
+++ b/services/keepstore/pull_worker_integration_test.go
@@ -1,16 +1,19 @@
  package main
  
  import (
+       "bytes"
+       "errors"
         "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
         "git.curoverse.com/arvados.git/sdk/go/arvadostest"
         "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       "io"
         "net/http"
         "os"
         "strings"
         "testing"
  )
  
-var keepClient keepclient.KeepClient
+var keepClient *keepclient.KeepClient
  
  type PullWorkIntegrationTestData struct {
         Name     string
@@ -33,7 +36,7 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
         }
  
         // keep client
-       keepClient = keepclient.KeepClient{
+       keepClient = &keepclient.KeepClient{
                 Arvados:       &arv,
                 Want_replicas: 1,
                 Using_proxy:   true,
@@ -42,17 +45,15 @@ func SetupPullWorkerIntegrationTest(t *testing.T, testData PullWorkIntegrationTe
  
         // discover keep services
         var servers []string
-       service_roots, err := keepClient.DiscoverKeepServers()
-       if err != nil {
+       if err := keepClient.DiscoverKeepServers(); err != nil {
                 t.Error("Error discovering keep services")
         }
-       for _, host := range service_roots {
+       for _, host := range keepClient.LocalRoots() {
                 servers = append(servers, host)
         }
  
         // Put content if the test needs it
         if wantData {
-               keepClient.SetServiceRoots(service_roots)
                 locator, _, err := keepClient.PutB([]byte(testData.Content))
                 if err != nil {
                         t.Errorf("Error putting test data in setup for %s %s %v", testData.Content, locator, err)
@@ -106,6 +107,7 @@ func TestPullWorkerIntegration_GetExistingLocator(t *testing.T) {
  func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pullRequest PullRequest, t *testing.T) {
  
         // Override PutContent to mock PutBlock functionality
+       defer func(orig func([]byte, string) error) { PutContent = orig }(PutContent)
         PutContent = func(content []byte, locator string) (err error) {
                 if string(content) != testData.Content {
                         t.Errorf("PutContent invoked with unexpected data. Expected: %s; Found: %s", testData.Content, content)
@@ -113,16 +115,29 @@ func performPullWorkerIntegrationTest(testData PullWorkIntegrationTestData, pull
                 return
         }
  
+       // Override GetContent to mock keepclient Get functionality
+       defer func(orig func(string, *keepclient.KeepClient) (io.ReadCloser, int64, string, error)) {
+               GetContent = orig
+       }(GetContent)
+       GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (
+               reader io.ReadCloser, contentLength int64, url string, err error) {
+               if testData.GetError != "" {
+                       return nil, 0, "", errors.New(testData.GetError)
+               }
+               rdr := &ClosingBuffer{bytes.NewBufferString(testData.Content)}
+               return rdr, int64(len(testData.Content)), "", nil
+       }
+
         keepClient.Arvados.ApiToken = GenerateRandomApiToken()
         err := PullItemAndProcess(pullRequest, keepClient.Arvados.ApiToken, keepClient)
  
         if len(testData.GetError) > 0 {
                 if (err == nil) || (!strings.Contains(err.Error(), testData.GetError)) {
-                       t.Errorf("Got error %v", err)
+                       t.Errorf("Got error %v, expected %v", err, testData.GetError)
                 }
         } else {
                 if err != nil {
-                       t.Errorf("Got error %v", err)
+                       t.Errorf("Got error %v, expected nil", err)
                 }
         }
  }
diff --git a/services/keepstore/pull_worker_test.go b/services/keepstore/pull_worker_test.go

index f0e9e65f1ee1015a57c2bd87e8d9c926978f21c4..37d83b32802af1432bf7ed8f2af5826a3d757914 100644 (file)
--- a/services/keepstore/pull_worker_test.go
+++ b/services/keepstore/pull_worker_test.go
@@ -9,6 +9,7 @@ import (
         "io"
         "net/http"
         "testing"
+       "time"
  )
  
  type PullWorkerTestSuite struct{}
@@ -22,7 +23,6 @@ func TestPullWorker(t *testing.T) {
  var _ = Suite(&PullWorkerTestSuite{})
  
  var testPullLists map[string]string
-var processedPullLists map[string]string
  var readContent string
  var readError error
  var putContent []byte
@@ -39,7 +39,6 @@ func (s *PullWorkerTestSuite) SetUpTest(c *C) {
         // This behavior is verified using these two maps in the
         // "TestPullWorker_pull_list_with_two_items_latest_replacing_old"
         testPullLists = make(map[string]string)
-       processedPullLists = make(map[string]string)
  }
  
  // Since keepstore does not come into picture in tests,
@@ -56,14 +55,13 @@ func RunTestPullWorker(c *C) {
  
  var first_pull_list = []byte(`[
                 {
-                       "locator":"locator1",
+                       "locator":"acbd18db4cc2f85cedef654fccc4a4d8+3",
                         "servers":[
                                 "server_1",
                                 "server_2"
                         ]
-               },
-    {
-                       "locator":"locator2",
+               },{
+                       "locator":"37b51d194a7513e45b56f6524f2d51f2+3",
                         "servers":[
                                 "server_3"
                         ]
@@ -72,10 +70,10 @@ var first_pull_list = []byte(`[
  
  var second_pull_list = []byte(`[
                 {
-                       "locator":"locator3",
+                       "locator":"73feffa4b7f6bb68e44cf984c85f6e88+3",
                         "servers":[
                                 "server_1",
-        "server_2"
+                               "server_2"
                         ]
                 }
         ]`)
@@ -238,15 +236,23 @@ func (s *PullWorkerTestSuite) TestPullWorker_invalid_data_manager_token(c *C) {
  }
  
  func performTest(testData PullWorkerTestData, c *C) {
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
         RunTestPullWorker(c)
+       defer pullq.Close()
  
         currentTestData = testData
         testPullLists[testData.name] = testData.response_body
  
-       // Override GetContent to mock keepclient Get functionality
-       GetContent = func(signedLocator string, keepClient keepclient.KeepClient) (
-               reader io.ReadCloser, contentLength int64, url string, err error) {
+       processedPullLists := make(map[string]string)
  
+       // Override GetContent to mock keepclient Get functionality
+       defer func(orig func(string, *keepclient.KeepClient) (io.ReadCloser, int64, string, error)) {
+               GetContent = orig
+       }(GetContent)
+       GetContent = func(signedLocator string, keepClient *keepclient.KeepClient) (reader io.ReadCloser, contentLength int64, url string, err error) {
+               c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(1))
                 processedPullLists[testData.name] = testData.response_body
                 if testData.read_error {
                         err = errors.New("Error getting data")
@@ -262,6 +268,7 @@ func performTest(testData PullWorkerTestData, c *C) {
         }
  
         // Override PutContent to mock PutBlock functionality
+       defer func(orig func([]byte, string) error) { PutContent = orig }(PutContent)
         PutContent = func(content []byte, locator string) (err error) {
                 if testData.put_error {
                         err = errors.New("Error putting data")
@@ -273,13 +280,17 @@ func performTest(testData PullWorkerTestData, c *C) {
                 }
         }
  
-       response := IssueRequest(&testData.req)
-       c.Assert(testData.response_code, Equals, response.Code)
-       c.Assert(testData.response_body, Equals, response.Body.String())
+       c.Assert(getStatusItem("PullQueue", "InProgress"), Equals, float64(0))
+       c.Assert(getStatusItem("PullQueue", "Queued"), Equals, float64(0))
  
-       expectWorkerChannelEmpty(c, pullq.NextItem)
+       response := IssueRequest(&testData.req)
+       c.Assert(response.Code, Equals, testData.response_code)
+       c.Assert(response.Body.String(), Equals, testData.response_body)
  
-       pullq.Close()
+       expectEqualWithin(c, time.Second, 0, func() interface{} {
+               st := pullq.Status()
+               return st.InProgress + st.Queued
+       })
  
         if testData.name == "TestPullWorker_pull_list_with_two_items_latest_replacing_old" {
                 c.Assert(len(testPullLists), Equals, 2)
@@ -310,6 +321,8 @@ func performTest(testData PullWorkerTestData, c *C) {
                         c.Assert(string(putContent), Equals, testData.read_content)
                 }
         }
+
+       expectChannelEmpty(c, pullq.NextItem)
  }
  
  type ClosingBuffer struct {
@@ -319,19 +332,3 @@ type ClosingBuffer struct {
  func (cb *ClosingBuffer) Close() (err error) {
         return
  }
-
-func expectWorkerChannelEmpty(c *C, workerChannel <-chan interface{}) {
-       select {
-       case item := <-workerChannel:
-               c.Fatalf("Received value (%v) from channel that was expected to be empty", item)
-       default:
-       }
-}
-
-func expectWorkerChannelNotEmpty(c *C, workerChannel <-chan interface{}) {
-       select {
-       case item := <-workerChannel:
-               c.Fatalf("Received value (%v) from channel that was expected to be empty", item)
-       default:
-       }
-}
diff --git a/services/keepstore/status_test.go b/services/keepstore/status_test.go

new file mode 100644 (file)

index 0000000..74de61e
--- /dev/null
+++ b/services/keepstore/status_test.go
@@ -0,0 +1,21 @@
+package main
+
+import (
+       "encoding/json"
+)
+
+// We don't have isolated unit tests for /status.json yet, but we do
+// check (e.g., in pull_worker_test.go) that /status.json reports
+// specific statistics correctly at the appropriate times.
+
+// getStatusItem("foo","bar","baz") retrieves /status.json, decodes
+// the response body into resp, and returns resp["foo"]["bar"]["baz"].
+func getStatusItem(keys ...string) interface{} {
+       resp := IssueRequest(&RequestTester{"/status.json", "", "GET", nil})
+       var s interface{}
+       json.NewDecoder(resp.Body).Decode(&s)
+       for _, k := range keys {
+               s = s.(map[string]interface{})[k]
+       }
+       return s
+}
diff --git a/services/keepstore/trash_worker.go b/services/keepstore/trash_worker.go

new file mode 100644 (file)

index 0000000..8f78658
--- /dev/null
+++ b/services/keepstore/trash_worker.go
@@ -0,0 +1,61 @@
+package main
+
+import (
+       "errors"
+       "log"
+       "time"
+)
+
+/*
+       Keepstore initiates trash worker channel goroutine.
+       The channel will process trash list.
+               For each (next) trash request:
+      Delete the block indicated by the trash request Locator
+               Repeat
+*/
+
+func RunTrashWorker(trashq *WorkQueue) {
+       for item := range trashq.NextItem {
+               trashRequest := item.(TrashRequest)
+               TrashItem(trashRequest)
+               trashq.DoneItem <- struct{}{}
+       }
+}
+
+// TrashItem deletes the indicated block from every writable volume.
+func TrashItem(trashRequest TrashRequest) {
+       reqMtime := time.Unix(trashRequest.BlockMtime, 0)
+       if time.Since(reqMtime) < blob_signature_ttl {
+               log.Printf("WARNING: data manager asked to delete a %v old block %v (BlockMtime %d = %v), but my blob_signature_ttl is %v! Skipping.",
+                       time.Since(reqMtime),
+                       trashRequest.Locator,
+                       trashRequest.BlockMtime,
+                       reqMtime,
+                       blob_signature_ttl)
+               return
+       }
+
+       for _, volume := range KeepVM.AllWritable() {
+               mtime, err := volume.Mtime(trashRequest.Locator)
+               if err != nil {
+                       log.Printf("%v Delete(%v): %v", volume, trashRequest.Locator, err)
+                       continue
+               }
+               if trashRequest.BlockMtime != mtime.Unix() {
+                       log.Printf("%v Delete(%v): mtime on volume is %v does not match trash list value %v", volume, trashRequest.Locator, mtime.Unix(), trashRequest.BlockMtime)
+                       continue
+               }
+
+               if never_delete {
+                       err = errors.New("did not delete block because never_delete is true")
+               } else {
+                       err = volume.Delete(trashRequest.Locator)
+               }
+
+               if err != nil {
+                       log.Printf("%v Delete(%v): %v", volume, trashRequest.Locator, err)
+               } else {
+                       log.Printf("%v Delete(%v) OK", volume, trashRequest.Locator)
+               }
+       }
+}
diff --git a/services/keepstore/trash_worker_test.go b/services/keepstore/trash_worker_test.go

new file mode 100644 (file)

index 0000000..40b291e
--- /dev/null
+++ b/services/keepstore/trash_worker_test.go
@@ -0,0 +1,332 @@
+package main
+
+import (
+       "container/list"
+       "testing"
+       "time"
+)
+
+type TrashWorkerTestData struct {
+       Locator1    string
+       Block1      []byte
+       BlockMtime1 int64
+
+       Locator2    string
+       Block2      []byte
+       BlockMtime2 int64
+
+       CreateData      bool
+       CreateInVolume1 bool
+
+       UseTrashLifeTime bool
+       DifferentMtimes  bool
+
+       DeleteLocator string
+
+       ExpectLocator1 bool
+       ExpectLocator2 bool
+}
+
+/* Delete block that does not exist in any of the keep volumes.
+   Expect no errors.
+*/
+func TestTrashWorkerIntegration_GetNonExistingLocator(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: "5d41402abc4b2a76b9719d911017c592",
+               Block1:   []byte("hello"),
+
+               Locator2: "5d41402abc4b2a76b9719d911017c592",
+               Block2:   []byte("hello"),
+
+               CreateData: false,
+
+               DeleteLocator: "5d41402abc4b2a76b9719d911017c592",
+
+               ExpectLocator1: false,
+               ExpectLocator2: false,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Delete a block that exists on volume 1 of the keep servers.
+   Expect the second locator in volume 2 to be unaffected.
+*/
+func TestTrashWorkerIntegration_LocatorInVolume1(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH_2,
+               Block2:   TEST_BLOCK_2,
+
+               CreateData: true,
+
+               DeleteLocator: TEST_HASH, // first locator
+
+               ExpectLocator1: false,
+               ExpectLocator2: true,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Delete a block that exists on volume 2 of the keep servers.
+   Expect the first locator in volume 1 to be unaffected.
+*/
+func TestTrashWorkerIntegration_LocatorInVolume2(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH_2,
+               Block2:   TEST_BLOCK_2,
+
+               CreateData: true,
+
+               DeleteLocator: TEST_HASH_2, // locator 2
+
+               ExpectLocator1: true,
+               ExpectLocator2: false,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Delete a block with matching mtime for locator in both volumes.
+   Expect locator to be deleted from both volumes.
+*/
+func TestTrashWorkerIntegration_LocatorInBothVolumes(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH,
+               Block2:   TEST_BLOCK,
+
+               CreateData: true,
+
+               DeleteLocator: TEST_HASH,
+
+               ExpectLocator1: false,
+               ExpectLocator2: false,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Same locator with different Mtimes exists in both volumes.
+   Delete the second and expect the first to be still around.
+*/
+func TestTrashWorkerIntegration_MtimeMatchesForLocator1ButNotForLocator2(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH,
+               Block2:   TEST_BLOCK,
+
+               CreateData:      true,
+               DifferentMtimes: true,
+
+               DeleteLocator: TEST_HASH,
+
+               ExpectLocator1: true,
+               ExpectLocator2: false,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Two different locators in volume 1.
+   Delete one of them.
+   Expect the other unaffected.
+*/
+func TestTrashWorkerIntegration_TwoDifferentLocatorsInVolume1(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH_2,
+               Block2:   TEST_BLOCK_2,
+
+               CreateData:      true,
+               CreateInVolume1: true,
+
+               DeleteLocator: TEST_HASH, // locator 1
+
+               ExpectLocator1: false,
+               ExpectLocator2: true,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Allow default Trash Life time to be used. Thus, the newly created block
+   will not be deleted becuase its Mtime is within the trash life time.
+*/
+func TestTrashWorkerIntegration_SameLocatorInTwoVolumesWithDefaultTrashLifeTime(t *testing.T) {
+       never_delete = false
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH_2,
+               Block2:   TEST_BLOCK_2,
+
+               CreateData:      true,
+               CreateInVolume1: true,
+
+               UseTrashLifeTime: true,
+
+               DeleteLocator: TEST_HASH, // locator 1
+
+               // Since trash life time is in effect, block won't be deleted.
+               ExpectLocator1: true,
+               ExpectLocator2: true,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Delete a block with matching mtime for locator in both volumes, but never_delete is true,
+   so block won't be deleted.
+*/
+func TestTrashWorkerIntegration_NeverDelete(t *testing.T) {
+       never_delete = true
+       testData := TrashWorkerTestData{
+               Locator1: TEST_HASH,
+               Block1:   TEST_BLOCK,
+
+               Locator2: TEST_HASH,
+               Block2:   TEST_BLOCK,
+
+               CreateData: true,
+
+               DeleteLocator: TEST_HASH,
+
+               ExpectLocator1: true,
+               ExpectLocator2: true,
+       }
+       performTrashWorkerTest(testData, t)
+}
+
+/* Perform the test */
+func performTrashWorkerTest(testData TrashWorkerTestData, t *testing.T) {
+       // Create Keep Volumes
+       KeepVM = MakeTestVolumeManager(2)
+       defer KeepVM.Close()
+
+       // Put test content
+       vols := KeepVM.AllWritable()
+       if testData.CreateData {
+               vols[0].Put(testData.Locator1, testData.Block1)
+               vols[0].Put(testData.Locator1+".meta", []byte("metadata"))
+
+               if testData.CreateInVolume1 {
+                       vols[0].Put(testData.Locator2, testData.Block2)
+                       vols[0].Put(testData.Locator2+".meta", []byte("metadata"))
+               } else {
+                       vols[1].Put(testData.Locator2, testData.Block2)
+                       vols[1].Put(testData.Locator2+".meta", []byte("metadata"))
+               }
+       }
+
+       oldBlockTime := time.Now().Add(-blob_signature_ttl - time.Minute)
+
+       // Create TrashRequest for the test
+       trashRequest := TrashRequest{
+               Locator:    testData.DeleteLocator,
+               BlockMtime: oldBlockTime.Unix(),
+       }
+
+       // Run trash worker and put the trashRequest on trashq
+       trashList := list.New()
+       trashList.PushBack(trashRequest)
+       trashq = NewWorkQueue()
+       defer trashq.Close()
+
+       if !testData.UseTrashLifeTime {
+               // Trash worker would not delete block if its Mtime is
+               // within trash life time. Back-date the block to
+               // allow the deletion to succeed.
+               for _, v := range vols {
+                       v.(*MockVolume).Timestamps[testData.DeleteLocator] = oldBlockTime
+                       if testData.DifferentMtimes {
+                               oldBlockTime = oldBlockTime.Add(time.Second)
+                       }
+               }
+       }
+       go RunTrashWorker(trashq)
+
+       // Install gate so all local operations block until we say go
+       gate := make(chan struct{})
+       for _, v := range vols {
+               v.(*MockVolume).Gate = gate
+       }
+
+       assertStatusItem := func(k string, expect float64) {
+               if v := getStatusItem("TrashQueue", k); v != expect {
+                       t.Errorf("Got %s %v, expected %v", k, v, expect)
+               }
+       }
+
+       assertStatusItem("InProgress", 0)
+       assertStatusItem("Queued", 0)
+
+       listLen := trashList.Len()
+       trashq.ReplaceQueue(trashList)
+
+       // Wait for worker to take request(s)
+       expectEqualWithin(t, time.Second, listLen, func() interface{} { return trashq.Status().InProgress })
+
+       // Ensure status.json also reports work is happening
+       assertStatusItem("InProgress", float64(1))
+       assertStatusItem("Queued", float64(listLen-1))
+
+       // Let worker proceed
+       close(gate)
+
+       // Wait for worker to finish
+       expectEqualWithin(t, time.Second, 0, func() interface{} { return trashq.Status().InProgress })
+
+       // Verify Locator1 to be un/deleted as expected
+       data, _ := GetBlock(testData.Locator1, false)
+       if testData.ExpectLocator1 {
+               if len(data) == 0 {
+                       t.Errorf("Expected Locator1 to be still present: %s", testData.Locator1)
+               }
+       } else {
+               if len(data) > 0 {
+                       t.Errorf("Expected Locator1 to be deleted: %s", testData.Locator1)
+               }
+       }
+
+       // Verify Locator2 to be un/deleted as expected
+       if testData.Locator1 != testData.Locator2 {
+               data, _ = GetBlock(testData.Locator2, false)
+               if testData.ExpectLocator2 {
+                       if len(data) == 0 {
+                               t.Errorf("Expected Locator2 to be still present: %s", testData.Locator2)
+                       }
+               } else {
+                       if len(data) > 0 {
+                               t.Errorf("Expected Locator2 to be deleted: %s", testData.Locator2)
+                       }
+               }
+       }
+
+       // The DifferentMtimes test puts the same locator in two
+       // different volumes, but only one copy has an Mtime matching
+       // the trash request.
+       if testData.DifferentMtimes {
+               locatorFoundIn := 0
+               for _, volume := range KeepVM.AllReadable() {
+                       if _, err := volume.Get(testData.Locator1); err == nil {
+                               locatorFoundIn = locatorFoundIn + 1
+                       }
+               }
+               if locatorFoundIn != 1 {
+                       t.Errorf("Found %d copies of %s, expected 1", locatorFoundIn, testData.Locator1)
+               }
+       }
+}
diff --git a/services/keepstore/volume.go b/services/keepstore/volume.go

index e7683ee991a41ac8a8f45cfa9f5f263e2a7a42a6..64fea34bfe1c32ad9b6b6b33a74c82f8b9f0252f 100644 (file)
--- a/services/keepstore/volume.go
+++ b/services/keepstore/volume.go
@@ -5,179 +5,76 @@
  package main
  
  import (
-       "errors"
-       "fmt"
-       "os"
-       "strings"
+       "io"
+       "sync/atomic"
         "time"
  )
  
  type Volume interface {
+       // Get a block. IFF the returned error is nil, the caller must
+       // put the returned slice back into the buffer pool when it's
+       // finished with it.
         Get(loc string) ([]byte, error)
         Put(loc string, block []byte) error
         Touch(loc string) error
         Mtime(loc string) (time.Time, error)
-       Index(prefix string) string
+       IndexTo(prefix string, writer io.Writer) error
         Delete(loc string) error
         Status() *VolumeStatus
         String() string
+       Writable() bool
  }
  
-// MockVolumes are Volumes used to test the Keep front end.
-//
-// If the Bad field is true, this volume should return an error
-// on all writes and puts.
-//
-// The Touchable field signifies whether the Touch method will
-// succeed.  Defaults to true.  Note that Bad and Touchable are
-// independent: a MockVolume may be set up so that Put fails but Touch
-// works or vice versa.
-//
-// TODO(twp): rename Bad to something more descriptive, e.g. Writable,
-// and make sure that the tests that rely on it are testing the right
-// thing.  We may need to simulate Writable, Touchable and Corrupt
-// volumes in different ways.
-//
-type MockVolume struct {
-       Store      map[string][]byte
-       Timestamps map[string]time.Time
-       Bad        bool
-       Touchable  bool
-}
-
-func CreateMockVolume() *MockVolume {
-       return &MockVolume{
-               Store:      make(map[string][]byte),
-               Timestamps: make(map[string]time.Time),
-               Bad:        false,
-               Touchable:  true,
-       }
-}
-
-func (v *MockVolume) Get(loc string) ([]byte, error) {
-       if v.Bad {
-               return nil, errors.New("Bad volume")
-       } else if block, ok := v.Store[loc]; ok {
-               return block, nil
-       }
-       return nil, os.ErrNotExist
+// A VolumeManager tells callers which volumes can read, which volumes
+// can write, and on which volume the next write should be attempted.
+type VolumeManager interface {
+       // AllReadable returns all volumes.
+       AllReadable() []Volume
+       // AllWritable returns all volumes that aren't known to be in
+       // a read-only state. (There is no guarantee that a write to
+       // one will succeed, though.)
+       AllWritable() []Volume
+       // NextWritable returns the volume where the next new block
+       // should be written. A VolumeManager can select a volume in
+       // order to distribute activity across spindles, fill up disks
+       // with more free space, etc.
+       NextWritable() Volume
+       // Close shuts down the volume manager cleanly.
+       Close()
  }
  
-func (v *MockVolume) Put(loc string, block []byte) error {
-       if v.Bad {
-               return errors.New("Bad volume")
-       }
-       v.Store[loc] = block
-       return v.Touch(loc)
+type RRVolumeManager struct {
+       readables []Volume
+       writables []Volume
+       counter   uint32
  }
  
-func (v *MockVolume) Touch(loc string) error {
-       if v.Touchable {
-               v.Timestamps[loc] = time.Now()
-               return nil
+func MakeRRVolumeManager(volumes []Volume) *RRVolumeManager {
+       vm := &RRVolumeManager{}
+       for _, v := range volumes {
+               vm.readables = append(vm.readables, v)
+               if v.Writable() {
+                       vm.writables = append(vm.writables, v)
+               }
         }
-       return errors.New("Touch failed")
+       return vm
  }
  
-func (v *MockVolume) Mtime(loc string) (time.Time, error) {
-       var mtime time.Time
-       var err error
-       if v.Bad {
-               err = errors.New("Bad volume")
-       } else if t, ok := v.Timestamps[loc]; ok {
-               mtime = t
-       } else {
-               err = os.ErrNotExist
-       }
-       return mtime, err
+func (vm *RRVolumeManager) AllReadable() []Volume {
+       return vm.readables
  }
  
-func (v *MockVolume) Index(prefix string) string {
-       var result string
-       for loc, block := range v.Store {
-               if IsValidLocator(loc) && strings.HasPrefix(loc, prefix) {
-                       result = result + fmt.Sprintf("%s+%d %d\n",
-                               loc, len(block), 123456789)
-               }
-       }
-       return result
+func (vm *RRVolumeManager) AllWritable() []Volume {
+       return vm.writables
  }
  
-func (v *MockVolume) Delete(loc string) error {
-       if _, ok := v.Store[loc]; ok {
-               if time.Since(v.Timestamps[loc]) < permission_ttl {
-                       return nil
-               }
-               delete(v.Store, loc)
+func (vm *RRVolumeManager) NextWritable() Volume {
+       if len(vm.writables) == 0 {
                 return nil
         }
-       return os.ErrNotExist
-}
-
-func (v *MockVolume) Status() *VolumeStatus {
-       var used uint64
-       for _, block := range v.Store {
-               used = used + uint64(len(block))
-       }
-       return &VolumeStatus{"/bogo", 123, 1000000 - used, used}
-}
-
-func (v *MockVolume) String() string {
-       return "[MockVolume]"
-}
-
-// A VolumeManager manages a collection of volumes.
-//
-// - Volumes is a slice of available Volumes.
-// - Choose() returns a Volume suitable for writing to.
-// - Quit() instructs the VolumeManager to shut down gracefully.
-//
-type VolumeManager interface {
-       Volumes() []Volume
-       Choose() Volume
-       Quit()
-}
-
-type RRVolumeManager struct {
-       volumes   []Volume
-       nextwrite chan Volume
-       quit      chan int
-}
-
-func MakeRRVolumeManager(vols []Volume) *RRVolumeManager {
-       // Create a new VolumeManager struct with the specified volumes,
-       // and with new Nextwrite and Quit channels.
-       // The Quit channel is buffered with a capacity of 1 so that
-       // another routine may write to it without blocking.
-       vm := &RRVolumeManager{vols, make(chan Volume), make(chan int, 1)}
-
-       // This goroutine implements round-robin volume selection.
-       // It sends each available Volume in turn to the Nextwrite
-       // channel, until receiving a notification on the Quit channel
-       // that it should terminate.
-       go func() {
-               var i int = 0
-               for {
-                       select {
-                       case <-vm.quit:
-                               return
-                       case vm.nextwrite <- vm.volumes[i]:
-                               i = (i + 1) % len(vm.volumes)
-                       }
-               }
-       }()
-
-       return vm
-}
-
-func (vm *RRVolumeManager) Volumes() []Volume {
-       return vm.volumes
-}
-
-func (vm *RRVolumeManager) Choose() Volume {
-       return <-vm.nextwrite
+       i := atomic.AddUint32(&vm.counter, 1)
+       return vm.writables[i%uint32(len(vm.writables))]
  }
  
-func (vm *RRVolumeManager) Quit() {
-       vm.quit <- 1
+func (vm *RRVolumeManager) Close() {
  }
diff --git a/services/keepstore/volume_test.go b/services/keepstore/volume_test.go

new file mode 100644 (file)

index 0000000..c5a7491
--- /dev/null
+++ b/services/keepstore/volume_test.go
@@ -0,0 +1,174 @@
+package main
+
+import (
+       "errors"
+       "fmt"
+       "io"
+       "os"
+       "strings"
+       "sync"
+       "time"
+)
+
+// MockVolumes are test doubles for Volumes, used to test handlers.
+type MockVolume struct {
+       Store      map[string][]byte
+       Timestamps map[string]time.Time
+       // Bad volumes return an error for every operation.
+       Bad bool
+       // Touchable volumes' Touch() method succeeds for a locator
+       // that has been Put().
+       Touchable bool
+       // Readonly volumes return an error for Put, Delete, and
+       // Touch.
+       Readonly bool
+       // Gate is a "starting gate", allowing test cases to pause
+       // volume operations long enough to inspect state. Every
+       // operation (except Status) starts by receiving from
+       // Gate. Sending one value unblocks one operation; closing the
+       // channel unblocks all operations. By default, Gate is a
+       // closed channel, so all operations proceed without
+       // blocking. See trash_worker_test.go for an example.
+       Gate   chan struct{}
+       called map[string]int
+       mutex  sync.Mutex
+}
+
+// CreateMockVolume returns a non-Bad, non-Readonly, Touchable mock
+// volume.
+func CreateMockVolume() *MockVolume {
+       gate := make(chan struct{})
+       close(gate)
+       return &MockVolume{
+               Store:      make(map[string][]byte),
+               Timestamps: make(map[string]time.Time),
+               Bad:        false,
+               Touchable:  true,
+               Readonly:   false,
+               called:     map[string]int{},
+               Gate:       gate,
+       }
+}
+
+// CallCount returns how many times the named method has been called.
+func (v *MockVolume) CallCount(method string) int {
+       v.mutex.Lock()
+       defer v.mutex.Unlock()
+       if c, ok := v.called[method]; !ok {
+               return 0
+       } else {
+               return c
+       }
+}
+
+func (v *MockVolume) gotCall(method string) {
+       v.mutex.Lock()
+       defer v.mutex.Unlock()
+       if _, ok := v.called[method]; !ok {
+               v.called[method] = 1
+       } else {
+               v.called[method]++
+       }
+}
+
+func (v *MockVolume) Get(loc string) ([]byte, error) {
+       v.gotCall("Get")
+       <-v.Gate
+       if v.Bad {
+               return nil, errors.New("Bad volume")
+       } else if block, ok := v.Store[loc]; ok {
+               buf := bufs.Get(len(block))
+               copy(buf, block)
+               return buf, nil
+       }
+       return nil, os.ErrNotExist
+}
+
+func (v *MockVolume) Put(loc string, block []byte) error {
+       v.gotCall("Put")
+       <-v.Gate
+       if v.Bad {
+               return errors.New("Bad volume")
+       }
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       v.Store[loc] = block
+       return v.Touch(loc)
+}
+
+func (v *MockVolume) Touch(loc string) error {
+       v.gotCall("Touch")
+       <-v.Gate
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       if v.Touchable {
+               v.Timestamps[loc] = time.Now()
+               return nil
+       }
+       return errors.New("Touch failed")
+}
+
+func (v *MockVolume) Mtime(loc string) (time.Time, error) {
+       v.gotCall("Mtime")
+       <-v.Gate
+       var mtime time.Time
+       var err error
+       if v.Bad {
+               err = errors.New("Bad volume")
+       } else if t, ok := v.Timestamps[loc]; ok {
+               mtime = t
+       } else {
+               err = os.ErrNotExist
+       }
+       return mtime, err
+}
+
+func (v *MockVolume) IndexTo(prefix string, w io.Writer) error {
+       v.gotCall("IndexTo")
+       <-v.Gate
+       for loc, block := range v.Store {
+               if !IsValidLocator(loc) || !strings.HasPrefix(loc, prefix) {
+                       continue
+               }
+               _, err := fmt.Fprintf(w, "%s+%d %d\n",
+                       loc, len(block), 123456789)
+               if err != nil {
+                       return err
+               }
+       }
+       return nil
+}
+
+func (v *MockVolume) Delete(loc string) error {
+       v.gotCall("Delete")
+       <-v.Gate
+       if v.Readonly {
+               return MethodDisabledError
+       }
+       if _, ok := v.Store[loc]; ok {
+               if time.Since(v.Timestamps[loc]) < blob_signature_ttl {
+                       return nil
+               }
+               delete(v.Store, loc)
+               return nil
+       }
+       return os.ErrNotExist
+}
+
+func (v *MockVolume) Status() *VolumeStatus {
+       var used uint64
+       for _, block := range v.Store {
+               used = used + uint64(len(block))
+       }
+       return &VolumeStatus{"/bogo", 123, 1000000 - used, used}
+}
+
+func (v *MockVolume) String() string {
+       return "[MockVolume]"
+}
+
+func (v *MockVolume) Writable() bool {
+       return !v.Readonly
+}
diff --git a/services/keepstore/volume_unix.go b/services/keepstore/volume_unix.go

index 4db2a5338400af7aa8c31089df63cf8cf71c502e..a7ad6f9e499c80439c27cb1beed33060674ed776 100644 (file)
--- a/services/keepstore/volume_unix.go
+++ b/services/keepstore/volume_unix.go
@@ -4,106 +4,41 @@ package main
  
  import (
         "fmt"
+       "io"
         "io/ioutil"
         "log"
         "os"
         "path/filepath"
+       "regexp"
         "strconv"
         "strings"
+       "sync"
         "syscall"
         "time"
  )
  
-// IORequests are encapsulated Get or Put requests.  They are used to
-// implement serialized I/O (i.e. only one read/write operation per
-// volume). When running in serialized mode, the Keep front end sends
-// IORequests on a channel to an IORunner, which handles them one at a
-// time and returns an IOResponse.
-//
-type IOMethod int
-
-const (
-       KeepGet IOMethod = iota
-       KeepPut
-)
-
-type IORequest struct {
-       method IOMethod
-       loc    string
-       data   []byte
-       reply  chan *IOResponse
-}
-
-type IOResponse struct {
-       data []byte
-       err  error
-}
-
-// A UnixVolume has the following properties:
-//
-//   root
-//       the path to the volume's root directory
-//   queue
-//       A channel of IORequests. If non-nil, all I/O requests for
-//       this volume should be queued on this channel; the result
-//       will be delivered on the IOResponse channel supplied in the
-//       request.
-//
+// A UnixVolume stores and retrieves blocks in a local directory.
  type UnixVolume struct {
-       root  string // path to this volume
-       queue chan *IORequest
-}
-
-func (v *UnixVolume) IOHandler() {
-       for req := range v.queue {
-               var result IOResponse
-               switch req.method {
-               case KeepGet:
-                       result.data, result.err = v.Read(req.loc)
-               case KeepPut:
-                       result.err = v.Write(req.loc, req.data)
-               }
-               req.reply <- &result
-       }
-}
-
-func MakeUnixVolume(root string, serialize bool) (v UnixVolume) {
-       if serialize {
-               v = UnixVolume{root, make(chan *IORequest)}
-               go v.IOHandler()
-       } else {
-               v = UnixVolume{root, nil}
-       }
-       return
-}
-
-func (v *UnixVolume) Get(loc string) ([]byte, error) {
-       if v.queue == nil {
-               return v.Read(loc)
-       }
-       reply := make(chan *IOResponse)
-       v.queue <- &IORequest{KeepGet, loc, nil, reply}
-       response := <-reply
-       return response.data, response.err
-}
-
-func (v *UnixVolume) Put(loc string, block []byte) error {
-       if v.queue == nil {
-               return v.Write(loc, block)
-       }
-       reply := make(chan *IOResponse)
-       v.queue <- &IORequest{KeepPut, loc, block, reply}
-       response := <-reply
-       return response.err
+       root      string // path to the volume's root directory
+       serialize bool
+       readonly  bool
+       mutex     sync.Mutex
  }
  
  func (v *UnixVolume) Touch(loc string) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
         p := v.blockPath(loc)
         f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
         if err != nil {
                 return err
         }
         defer f.Close()
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
         if e := lockfile(f); e != nil {
                 return e
         }
@@ -122,28 +57,50 @@ func (v *UnixVolume) Mtime(loc string) (time.Time, error) {
         }
  }
  
-// Read retrieves a block identified by the locator string "loc", and
+// Get retrieves a block identified by the locator string "loc", and
  // returns its contents as a byte slice.
  //
-// If the block could not be opened or read, Read returns a nil slice
-// and the os.Error that was generated.
-//
-// If the block is present but its content hash does not match loc,
-// Read returns the block and a CorruptError.  It is the caller's
-// responsibility to decide what (if anything) to do with the
-// corrupted data block.
-//
-func (v *UnixVolume) Read(loc string) ([]byte, error) {
-       buf, err := ioutil.ReadFile(v.blockPath(loc))
-       return buf, err
+// If the block could not be found, opened, or read, Get returns a nil
+// slice and whatever non-nil error was returned by Stat or ReadFile.
+func (v *UnixVolume) Get(loc string) ([]byte, error) {
+       path := v.blockPath(loc)
+       stat, err := os.Stat(path)
+       if err != nil {
+               return nil, err
+       }
+       if stat.Size() < 0 {
+               return nil, os.ErrInvalid
+       } else if stat.Size() == 0 {
+               return bufs.Get(0), nil
+       } else if stat.Size() > BLOCKSIZE {
+               return nil, TooLongError
+       }
+       f, err := os.Open(path)
+       if err != nil {
+               return nil, err
+       }
+       defer f.Close()
+       buf := bufs.Get(int(stat.Size()))
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
+       _, err = io.ReadFull(f, buf)
+       if err != nil {
+               bufs.Put(buf)
+               return nil, err
+       }
+       return buf, nil
  }
  
-// Write stores a block of data identified by the locator string
+// Put stores a block of data identified by the locator string
  // "loc".  It returns nil on success.  If the volume is full, it
  // returns a FullError.  If the write fails due to some other error,
  // that error is returned.
-//
-func (v *UnixVolume) Write(loc string, block []byte) error {
+func (v *UnixVolume) Put(loc string, block []byte) error {
+       if v.readonly {
+               return MethodDisabledError
+       }
         if v.IsFull() {
                 return FullError
         }
@@ -161,8 +118,14 @@ func (v *UnixVolume) Write(loc string, block []byte) error {
         }
         bpath := v.blockPath(loc)
  
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
         if _, err := tmpfile.Write(block); err != nil {
                 log.Printf("%s: writing to %s: %s\n", v, bpath, err)
+               tmpfile.Close()
+               os.Remove(tmpfile.Name())
                 return err
         }
         if err := tmpfile.Close(); err != nil {
@@ -179,7 +142,7 @@ func (v *UnixVolume) Write(loc string, block []byte) error {
  }
  
  // Status returns a VolumeStatus struct describing the volume's
-// current state.
+// current state, or nil if an error occurs.
  //
  func (v *UnixVolume) Status() *VolumeStatus {
         var fs syscall.Statfs_t
@@ -205,14 +168,15 @@ func (v *UnixVolume) Status() *VolumeStatus {
         return &VolumeStatus{v.root, devnum, free, used}
  }
  
-// Index returns a list of blocks found on this volume which begin with
-// the specified prefix. If the prefix is an empty string, Index returns
-// a complete list of blocks.
+var blockDirRe = regexp.MustCompile(`^[0-9a-f]+$`)
+
+// IndexTo writes (to the given Writer) a list of blocks found on this
+// volume which begin with the specified prefix. If the prefix is an
+// empty string, IndexTo writes a complete list of blocks.
  //
-// The return value is a multiline string (separated by
-// newlines). Each line is in the format
+// Each block is given in the format
  //
-//     locator+size modification-time
+//     locator+size modification-time {newline}
  //
  // e.g.:
  //
@@ -220,41 +184,73 @@ func (v *UnixVolume) Status() *VolumeStatus {
  //     e4d41e6fd68460e0e3fc18cc746959d2+67108864 1377796043
  //     e4de7a2810f5554cd39b36d8ddb132ff+67108864 1388701136
  //
-func (v *UnixVolume) Index(prefix string) (output string) {
-       filepath.Walk(v.root,
-               func(path string, info os.FileInfo, err error) error {
-                       // This WalkFunc inspects each path in the volume
-                       // and prints an index line for all files that begin
-                       // with prefix.
-                       if err != nil {
-                               log.Printf("IndexHandler: %s: walking to %s: %s",
-                                       v, path, err)
-                               return nil
-                       }
-                       locator := filepath.Base(path)
-                       // Skip directories that do not match prefix.
-                       // We know there is nothing interesting inside.
-                       if info.IsDir() &&
-                               !strings.HasPrefix(locator, prefix) &&
-                               !strings.HasPrefix(prefix, locator) {
-                               return filepath.SkipDir
-                       }
-                       // Skip any file that is not apparently a locator, e.g. .meta files
-                       if !IsValidLocator(locator) {
-                               return nil
+func (v *UnixVolume) IndexTo(prefix string, w io.Writer) error {
+       var lastErr error = nil
+       rootdir, err := os.Open(v.root)
+       if err != nil {
+               return err
+       }
+       defer rootdir.Close()
+       for {
+               names, err := rootdir.Readdirnames(1)
+               if err == io.EOF {
+                       return lastErr
+               } else if err != nil {
+                       return err
+               }
+               if !strings.HasPrefix(names[0], prefix) && !strings.HasPrefix(prefix, names[0]) {
+                       // prefix excludes all blocks stored in this dir
+                       continue
+               }
+               if !blockDirRe.MatchString(names[0]) {
+                       continue
+               }
+               blockdirpath := filepath.Join(v.root, names[0])
+               blockdir, err := os.Open(blockdirpath)
+               if err != nil {
+                       log.Print("Error reading ", blockdirpath, ": ", err)
+                       lastErr = err
+                       continue
+               }
+               for {
+                       fileInfo, err := blockdir.Readdir(1)
+                       if err == io.EOF {
+                               break
+                       } else if err != nil {
+                               log.Print("Error reading ", blockdirpath, ": ", err)
+                               lastErr = err
+                               break
                         }
-                       // Print filenames beginning with prefix
-                       if !info.IsDir() && strings.HasPrefix(locator, prefix) {
-                               output = output + fmt.Sprintf(
-                                       "%s+%d %d\n", locator, info.Size(), info.ModTime().Unix())
+                       name := fileInfo[0].Name()
+                       if !strings.HasPrefix(name, prefix) {
+                               continue
                         }
-                       return nil
-               })
-
-       return
+                       _, err = fmt.Fprint(w,
+                               name,
+                               "+", fileInfo[0].Size(),
+                               " ", fileInfo[0].ModTime().Unix(),
+                               "\n")
+               }
+               blockdir.Close()
+       }
  }
  
  func (v *UnixVolume) Delete(loc string) error {
+       // Touch() must be called before calling Write() on a block.  Touch()
+       // also uses lockfile().  This avoids a race condition between Write()
+       // and Delete() because either (a) the file will be deleted and Touch()
+       // will signal to the caller that the file is not present (and needs to
+       // be re-written), or (b) Touch() will update the file's timestamp and
+       // Delete() will read the correct up-to-date timestamp and choose not to
+       // delete the file.
+
+       if v.readonly {
+               return MethodDisabledError
+       }
+       if v.serialize {
+               v.mutex.Lock()
+               defer v.mutex.Unlock()
+       }
         p := v.blockPath(loc)
         f, err := os.OpenFile(p, os.O_RDWR|os.O_APPEND, 0644)
         if err != nil {
@@ -266,15 +262,15 @@ func (v *UnixVolume) Delete(loc string) error {
         }
         defer unlockfile(f)
  
-       // If the block has been PUT more recently than -permission_ttl,
-       // return success without removing the block.  This guards against
-       // a race condition where a block is old enough that Data Manager
-       // has added it to the trash list, but the user submitted a PUT
-       // for the block since then.
+       // If the block has been PUT in the last blob_signature_ttl
+       // seconds, return success without removing the block. This
+       // protects data from garbage collection until it is no longer
+       // possible for clients to retrieve the unreferenced blocks
+       // anyway (because the permission signatures have expired).
         if fi, err := os.Stat(p); err != nil {
                 return err
         } else {
-               if time.Since(fi.ModTime()) < permission_ttl {
+               if time.Since(fi.ModTime()) < blob_signature_ttl {
                         return nil
                 }
         }
@@ -342,6 +338,10 @@ func (v *UnixVolume) String() string {
         return fmt.Sprintf("[UnixVolume %s]", v.root)
  }
  
+func (v *UnixVolume) Writable() bool {
+       return !v.readonly
+}
+
  // lockfile and unlockfile use flock(2) to manage kernel file locks.
  func lockfile(f *os.File) error {
         return syscall.Flock(int(f.Fd()), syscall.LOCK_EX)
diff --git a/services/keepstore/volume_unix_test.go b/services/keepstore/volume_unix_test.go

index 7a10fc5c60a3db5c22001f9264a77561016a6181..ebb8421d9e1d3dbf6b8fce1f3fca0953568de931 100644 (file)
--- a/services/keepstore/volume_unix_test.go
+++ b/services/keepstore/volume_unix_test.go
@@ -5,30 +5,34 @@ import (
         "fmt"
         "io/ioutil"
         "os"
+       "regexp"
+       "sort"
+       "strings"
         "syscall"
         "testing"
         "time"
  )
  
-func TempUnixVolume(t *testing.T, serialize bool) UnixVolume {
+func TempUnixVolume(t *testing.T, serialize bool, readonly bool) *UnixVolume {
         d, err := ioutil.TempDir("", "volume_test")
         if err != nil {
                 t.Fatal(err)
         }
-       return MakeUnixVolume(d, serialize)
+       return &UnixVolume{
+               root:      d,
+               serialize: serialize,
+               readonly:  readonly,
+       }
  }
  
-func _teardown(v UnixVolume) {
-       if v.queue != nil {
-               close(v.queue)
-       }
+func _teardown(v *UnixVolume) {
         os.RemoveAll(v.root)
  }
  
-// store writes a Keep block directly into a UnixVolume, for testing
-// UnixVolume methods.
-//
-func _store(t *testing.T, vol UnixVolume, filename string, block []byte) {
+// _store writes a Keep block directly into a UnixVolume, bypassing
+// the overhead and safeguards of Put(). Useful for storing bogus data
+// and isolating unit tests from Put() behavior.
+func _store(t *testing.T, vol *UnixVolume, filename string, block []byte) {
         blockdir := fmt.Sprintf("%s/%s", vol.root, filename[:3])
         if err := os.MkdirAll(blockdir, 0755); err != nil {
                 t.Fatal(err)
@@ -44,7 +48,7 @@ func _store(t *testing.T, vol UnixVolume, filename string, block []byte) {
  }
  
  func TestGet(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
         _store(t, v, TEST_HASH, TEST_BLOCK)
  
@@ -58,7 +62,7 @@ func TestGet(t *testing.T) {
  }
  
  func TestGetNotFound(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
         _store(t, v, TEST_HASH, TEST_BLOCK)
  
@@ -73,8 +77,44 @@ func TestGetNotFound(t *testing.T) {
         }
  }
  
+func TestIndexTo(t *testing.T) {
+       v := TempUnixVolume(t, false, false)
+       defer _teardown(v)
+
+       _store(t, v, TEST_HASH, TEST_BLOCK)
+       _store(t, v, TEST_HASH_2, TEST_BLOCK_2)
+       _store(t, v, TEST_HASH_3, TEST_BLOCK_3)
+
+       buf := new(bytes.Buffer)
+       v.IndexTo("", buf)
+       index_rows := strings.Split(string(buf.Bytes()), "\n")
+       sort.Strings(index_rows)
+       sorted_index := strings.Join(index_rows, "\n")
+       m, err := regexp.MatchString(
+               `^\n`+TEST_HASH+`\+\d+ \d+\n`+
+                       TEST_HASH_3+`\+\d+ \d+\n`+
+                       TEST_HASH_2+`\+\d+ \d+$`,
+               sorted_index)
+       if err != nil {
+               t.Error(err)
+       } else if !m {
+               t.Errorf("Got index %q for empty prefix", sorted_index)
+       }
+
+       for _, prefix := range []string{"f", "f15", "f15ac"} {
+               buf = new(bytes.Buffer)
+               v.IndexTo(prefix, buf)
+               m, err := regexp.MatchString(`^`+TEST_HASH_2+`\+\d+ \d+\n$`, string(buf.Bytes()))
+               if err != nil {
+                       t.Error(err)
+               } else if !m {
+                       t.Errorf("Got index %q for prefix %q", string(buf.Bytes()), prefix)
+               }
+       }
+}
+
  func TestPut(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
  
         err := v.Put(TEST_HASH, TEST_BLOCK)
@@ -91,7 +131,7 @@ func TestPut(t *testing.T) {
  }
  
  func TestPutBadVolume(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
  
         os.Chmod(v.root, 000)
@@ -101,11 +141,44 @@ func TestPutBadVolume(t *testing.T) {
         }
  }
  
+func TestUnixVolumeReadonly(t *testing.T) {
+       v := TempUnixVolume(t, false, false)
+       defer _teardown(v)
+
+       // First write something before marking readonly
+       err := v.Put(TEST_HASH, TEST_BLOCK)
+       if err != nil {
+               t.Error("got err %v, expected nil", err)
+       }
+
+       v.readonly = true
+
+       _, err = v.Get(TEST_HASH)
+       if err != nil {
+               t.Error("got err %v, expected nil", err)
+       }
+
+       err = v.Put(TEST_HASH, TEST_BLOCK)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+
+       err = v.Touch(TEST_HASH)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+
+       err = v.Delete(TEST_HASH)
+       if err != MethodDisabledError {
+               t.Error("got err %v, expected MethodDisabledError", err)
+       }
+}
+
  // TestPutTouch
  //     Test that when applying PUT to a block that already exists,
  //     the block's modification time is updated.
  func TestPutTouch(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
  
         if err := v.Put(TEST_HASH, TEST_BLOCK); err != nil {
@@ -165,7 +238,7 @@ func TestPutTouch(t *testing.T) {
  //
  func TestGetSerialized(t *testing.T) {
         // Create a volume with I/O serialization enabled.
-       v := TempUnixVolume(t, true)
+       v := TempUnixVolume(t, true, false)
         defer _teardown(v)
  
         _store(t, v, TEST_HASH, TEST_BLOCK)
@@ -214,7 +287,7 @@ func TestGetSerialized(t *testing.T) {
  
  func TestPutSerialized(t *testing.T) {
         // Create a volume with I/O serialization enabled.
-       v := TempUnixVolume(t, true)
+       v := TempUnixVolume(t, true, false)
         defer _teardown(v)
  
         sem := make(chan int)
@@ -243,7 +316,7 @@ func TestPutSerialized(t *testing.T) {
         }(sem)
  
         // Wait for all goroutines to finish
-       for done := 0; done < 2; {
+       for done := 0; done < 3; {
                 done += <-sem
         }
  
@@ -274,7 +347,7 @@ func TestPutSerialized(t *testing.T) {
  }
  
  func TestIsFull(t *testing.T) {
-       v := TempUnixVolume(t, false)
+       v := TempUnixVolume(t, false, false)
         defer _teardown(v)
  
         full_path := v.root + "/full"
@@ -292,3 +365,23 @@ func TestIsFull(t *testing.T) {
                 t.Errorf("%s: should no longer be full", v)
         }
  }
+
+func TestNodeStatus(t *testing.T) {
+       v := TempUnixVolume(t, false, false)
+       defer _teardown(v)
+
+       // Get node status and make a basic sanity check.
+       volinfo := v.Status()
+       if volinfo.MountPoint != v.root {
+               t.Errorf("GetNodeStatus mount_point %s, expected %s", volinfo.MountPoint, v.root)
+       }
+       if volinfo.DeviceNum == 0 {
+               t.Errorf("uninitialized device_num in %v", volinfo)
+       }
+       if volinfo.BytesFree == 0 {
+               t.Errorf("uninitialized bytes_free in %v", volinfo)
+       }
+       if volinfo.BytesUsed == 0 {
+               t.Errorf("uninitialized bytes_used in %v", volinfo)
+       }
+}
diff --git a/services/keepstore/work_queue.go b/services/keepstore/work_queue.go

index 9509cacd774f5acd64372b55b1841bab55e5b023..f1878ffbbc550250ab88c5ea9a4a694d12d63132 100644 (file)
--- a/services/keepstore/work_queue.go
+++ b/services/keepstore/work_queue.go
@@ -85,76 +85,122 @@ package main
  import "container/list"
  
  type WorkQueue struct {
-       newlist  chan *list.List
-       NextItem chan interface{}
+       getStatus chan WorkQueueStatus
+       newlist   chan *list.List
+       // Workers get work items by reading from this channel.
+       NextItem <-chan interface{}
+       // Each worker must send struct{}{} to DoneItem exactly once
+       // for each work item received from NextItem, when it stops
+       // working on that item (regardless of whether the work was
+       // successful).
+       DoneItem chan<- struct{}
  }
  
-// NewWorkQueue returns a new worklist, and launches a listener
-// goroutine that waits for work and farms it out to workers.
+type WorkQueueStatus struct {
+       InProgress int
+       Queued     int
+}
+
+// NewWorkQueue returns a new empty WorkQueue.
  //
  func NewWorkQueue() *WorkQueue {
+       nextItem := make(chan interface{})
+       reportDone := make(chan struct{})
+       newList := make(chan *list.List)
         b := WorkQueue{
-               newlist:  make(chan *list.List),
-               NextItem: make(chan interface{}),
+               getStatus: make(chan WorkQueueStatus),
+               newlist:   newList,
+               NextItem:  nextItem,
+               DoneItem:  reportDone,
         }
-       go b.listen()
+       go func() {
+               // Read new work lists from the newlist channel.
+               // Reply to "status" and "get next item" queries by
+               // sending to the getStatus and nextItem channels
+               // respectively. Return when the newlist channel
+               // closes.
+
+               todo := &list.List{}
+               status := WorkQueueStatus{}
+
+               // When we're done, close the output channel; workers will
+               // shut down next time they ask for new work.
+               defer close(nextItem)
+               defer close(b.getStatus)
+
+               // nextChan and nextVal are both nil when we have
+               // nothing to send; otherwise they are, respectively,
+               // the nextItem channel and the next work item to send
+               // to it.
+               var nextChan chan interface{}
+               var nextVal interface{}
+
+               for newList != nil || status.InProgress > 0 {
+                       select {
+                       case p, ok := <-newList:
+                               if !ok {
+                                       // Closed, stop receiving
+                                       newList = nil
+                               }
+                               todo = p
+                               if todo == nil {
+                                       todo = &list.List{}
+                               }
+                               status.Queued = todo.Len()
+                               if status.Queued == 0 {
+                                       // Stop sending work
+                                       nextChan = nil
+                                       nextVal = nil
+                               } else {
+                                       nextChan = nextItem
+                                       nextVal = todo.Front().Value
+                               }
+                       case nextChan <- nextVal:
+                               todo.Remove(todo.Front())
+                               status.InProgress++
+                               status.Queued--
+                               if status.Queued == 0 {
+                                       // Stop sending work
+                                       nextChan = nil
+                                       nextVal = nil
+                               } else {
+                                       nextVal = todo.Front().Value
+                               }
+                       case <-reportDone:
+                               status.InProgress--
+                       case b.getStatus <- status:
+                       }
+               }
+       }()
         return &b
  }
  
-// ReplaceQueue sends a new list of pull requests to the manager goroutine.
-// The manager will discard any outstanding pull list and begin
-// working on the new list.
+// ReplaceQueue abandons any work items left in the existing queue,
+// and starts giving workers items from the given list. After giving
+// it to ReplaceQueue, the caller must not read or write the given
+// list.
  //
  func (b *WorkQueue) ReplaceQueue(list *list.List) {
         b.newlist <- list
  }
  
  // Close shuts down the manager and terminates the goroutine, which
-// completes any pull request in progress and abandons any pending
-// requests.
+// abandons any pending requests, but allows any pull request already
+// in progress to continue.
+//
+// After Close, Status will return correct values, NextItem will be
+// closed, and ReplaceQueue will panic.
  //
  func (b *WorkQueue) Close() {
         close(b.newlist)
  }
  
-// listen is run in a goroutine. It reads new pull lists from its
-// input queue until the queue is closed.
-// listen takes ownership of the list that is passed to it.
-//
-// Note that the routine does not ever need to access the list
-// itself once the current_item has been initialized, so we do
-// not bother to keep a pointer to the list. Because it is a
-// doubly linked list, holding on to the current item will keep
-// it from garbage collection.
+// Status returns an up-to-date WorkQueueStatus reflecting the current
+// queue status.
  //
-func (b *WorkQueue) listen() {
-       var current_item *list.Element
-
-       // When we're done, close the output channel to shut down any
-       // workers.
-       defer close(b.NextItem)
-
-       for {
-               // If the current list is empty, wait for a new list before
-               // even checking if workers are ready.
-               if current_item == nil {
-                       if p, ok := <-b.newlist; ok {
-                               current_item = p.Front()
-                       } else {
-                               // The channel was closed; shut down.
-                               return
-                       }
-               }
-               select {
-               case p, ok := <-b.newlist:
-                       if ok {
-                               current_item = p.Front()
-                       } else {
-                               // The input channel is closed; time to shut down
-                               return
-                       }
-               case b.NextItem <- current_item.Value:
-                       current_item = current_item.Next()
-               }
-       }
+func (b *WorkQueue) Status() WorkQueueStatus {
+       // If the channel is closed, we get the nil value of
+       // WorkQueueStatus, which is an accurate description of a
+       // finished queue.
+       return <-b.getStatus
  }
diff --git a/services/keepstore/work_queue_test.go b/services/keepstore/work_queue_test.go

index 144e4c252be9ba1a069d8cf862f72c4284a28479..74c67f2dd0a6c1ee69748d24c162d95b5c98b16a 100644 (file)
--- a/services/keepstore/work_queue_test.go
+++ b/services/keepstore/work_queue_test.go
@@ -2,9 +2,15 @@ package main
  
  import (
         "container/list"
+       "runtime"
         "testing"
+       "time"
  )
  
+type fatalfer interface {
+       Fatalf(string, ...interface{})
+}
+
  func makeTestWorkList(ary []int) *list.List {
         l := list.New()
         for _, n := range ary {
@@ -13,40 +19,101 @@ func makeTestWorkList(ary []int) *list.List {
         return l
  }
  
-func expectChannelEmpty(t *testing.T, c <-chan interface{}) {
+func expectChannelEmpty(t fatalfer, c <-chan interface{}) {
         select {
-       case item := <-c:
-               t.Fatalf("Received value (%v) from channel that we expected to be empty", item)
+       case item, ok := <-c:
+               if ok {
+                       t.Fatalf("Received value (%+v) from channel that we expected to be empty", item)
+               }
         default:
-               // no-op
         }
  }
  
-func expectChannelNotEmpty(t *testing.T, c <-chan interface{}) {
-       if item, ok := <-c; !ok {
-               t.Fatal("expected data on a closed channel")
-       } else if item == nil {
-               t.Fatal("expected data on an empty channel")
+func expectChannelNotEmpty(t fatalfer, c <-chan interface{}) interface{} {
+       select {
+       case item, ok := <-c:
+               if !ok {
+                       t.Fatalf("expected data on a closed channel")
+               }
+               return item
+       case <-time.After(time.Second):
+               t.Fatalf("expected data on an empty channel")
+               return nil
         }
  }
  
-func expectChannelClosed(t *testing.T, c <-chan interface{}) {
-       received, ok := <-c
-       if ok {
-               t.Fatalf("Expected channel to be closed, but received %v instead", received)
+func expectChannelClosedWithin(t fatalfer, timeout time.Duration, c <-chan interface{}) {
+       select {
+       case received, ok := <-c:
+               if ok {
+                       t.Fatalf("Expected channel to be closed, but received %+v instead", received)
+               }
+       case <-time.After(timeout):
+               t.Fatalf("Expected channel to be closed, but it is still open after %v", timeout)
         }
  }
  
-func expectFromChannel(t *testing.T, c <-chan interface{}, expected []int) {
+func doWorkItems(t fatalfer, q *WorkQueue, expected []int) {
         for i := range expected {
-               actual, ok := <-c
-               t.Logf("received %v", actual)
+               actual, ok := <-q.NextItem
                 if !ok {
-                       t.Fatalf("Expected %v but channel was closed after receiving the first %d elements correctly.", expected, i)
-               } else if actual.(int) != expected[i] {
-                       t.Fatalf("Expected %v but received '%v' after receiving the first %d elements correctly.", expected[i], actual, i)
+                       t.Fatalf("Expected %+v but channel was closed after receiving %+v as expected.", expected, expected[:i])
+               }
+               q.DoneItem <- struct{}{}
+               if actual.(int) != expected[i] {
+                       t.Fatalf("Expected %+v but received %+v after receiving %+v as expected.", expected[i], actual, expected[:i])
+               }
+       }
+}
+
+func expectEqualWithin(t fatalfer, timeout time.Duration, expect interface{}, f func() interface{}) {
+       ok := make(chan struct{})
+       giveup := false
+       go func() {
+               for f() != expect && !giveup {
+                       time.Sleep(time.Millisecond)
+               }
+               close(ok)
+       }()
+       select {
+       case <-ok:
+       case <-time.After(timeout):
+               giveup = true
+               _, file, line, _ := runtime.Caller(1)
+               t.Fatalf("Still getting %+v, timed out waiting for %+v\n%s:%d", f(), expect, file, line)
+       }
+}
+
+func expectQueued(t fatalfer, b *WorkQueue, expectQueued int) {
+       if l := b.Status().Queued; l != expectQueued {
+               t.Fatalf("Got Queued==%d, expected %d", l, expectQueued)
+       }
+}
+
+func TestWorkQueueDoneness(t *testing.T) {
+       b := NewWorkQueue()
+       defer b.Close()
+       b.ReplaceQueue(makeTestWorkList([]int{1, 2, 3}))
+       expectQueued(t, b, 3)
+       gate := make(chan struct{})
+       go func() {
+               <-gate
+               for _ = range b.NextItem {
+                       <-gate
+                       time.Sleep(time.Millisecond)
+                       b.DoneItem <- struct{}{}
                 }
+       }()
+       expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
+       b.ReplaceQueue(makeTestWorkList([]int{4, 5, 6}))
+       for i := 1; i <= 3; i++ {
+               gate <- struct{}{}
+               expectEqualWithin(t, time.Second, 3-i, func() interface{} { return b.Status().Queued })
+               expectEqualWithin(t, time.Second, 1, func() interface{} { return b.Status().InProgress })
         }
+       close(gate)
+       expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
+       expectChannelEmpty(t, b.NextItem)
  }
  
  // Create a WorkQueue, generate a list for it, and instantiate a worker.
@@ -54,9 +121,12 @@ func TestWorkQueueReadWrite(t *testing.T) {
         var input = []int{1, 1, 2, 3, 5, 8, 13, 21, 34}
  
         b := NewWorkQueue()
+       expectQueued(t, b, 0)
+
         b.ReplaceQueue(makeTestWorkList(input))
+       expectQueued(t, b, len(input))
  
-       expectFromChannel(t, b.NextItem, input)
+       doWorkItems(t, b, input)
         expectChannelEmpty(t, b.NextItem)
         b.Close()
  }
@@ -66,6 +136,7 @@ func TestWorkQueueEarlyRead(t *testing.T) {
         var input = []int{1, 1, 2, 3, 5, 8, 13, 21, 34}
  
         b := NewWorkQueue()
+       defer b.Close()
  
         // First, demonstrate that nothing is available on the NextItem
         // channel.
@@ -76,8 +147,7 @@ func TestWorkQueueEarlyRead(t *testing.T) {
         //
         done := make(chan int)
         go func() {
-               expectFromChannel(t, b.NextItem, input)
-               b.Close()
+               doWorkItems(t, b, input)
                 done <- 1
         }()
  
@@ -85,8 +155,29 @@ func TestWorkQueueEarlyRead(t *testing.T) {
         // finish.
         b.ReplaceQueue(makeTestWorkList(input))
         <-done
+       expectQueued(t, b, 0)
+}
  
-       expectChannelClosed(t, b.NextItem)
+// After Close(), NextItem closes, work finishes, then stats return zero.
+func TestWorkQueueClose(t *testing.T) {
+       b := NewWorkQueue()
+       input := []int{1, 2, 3, 4, 5, 6, 7, 8}
+       mark := make(chan struct{})
+       go func() {
+               <-b.NextItem
+               mark <- struct{}{}
+               <-mark
+               b.DoneItem <- struct{}{}
+       }()
+       b.ReplaceQueue(makeTestWorkList(input))
+       // Wait for worker to take item 1
+       <-mark
+       b.Close()
+       expectEqualWithin(t, time.Second, 1, func() interface{} { return b.Status().InProgress })
+       // Tell worker to report done
+       mark <- struct{}{}
+       expectEqualWithin(t, time.Second, 0, func() interface{} { return b.Status().InProgress })
+       expectChannelClosedWithin(t, time.Second, b.NextItem)
  }
  
  // Show that a reader may block when the manager's list is exhausted,
@@ -99,10 +190,11 @@ func TestWorkQueueReaderBlocks(t *testing.T) {
         )
  
         b := NewWorkQueue()
+       defer b.Close()
         sendmore := make(chan int)
         done := make(chan int)
         go func() {
-               expectFromChannel(t, b.NextItem, inputBeforeBlock)
+               doWorkItems(t, b, inputBeforeBlock)
  
                 // Confirm that the channel is empty, so a subsequent read
                 // on it will block.
@@ -110,8 +202,7 @@ func TestWorkQueueReaderBlocks(t *testing.T) {
  
                 // Signal that we're ready for more input.
                 sendmore <- 1
-               expectFromChannel(t, b.NextItem, inputAfterBlock)
-               b.Close()
+               doWorkItems(t, b, inputAfterBlock)
                 done <- 1
         }()
  
@@ -136,14 +227,14 @@ func TestWorkQueueReplaceQueue(t *testing.T) {
  
         // Read just the first five elements from the work list.
         // Confirm that the channel is not empty.
-       expectFromChannel(t, b.NextItem, firstInput[0:5])
+       doWorkItems(t, b, firstInput[0:5])
         expectChannelNotEmpty(t, b.NextItem)
  
         // Replace the work list and read five more elements.
         // The old list should have been discarded and all new
         // elements come from the new list.
         b.ReplaceQueue(makeTestWorkList(replaceInput))
-       expectFromChannel(t, b.NextItem, replaceInput[0:5])
+       doWorkItems(t, b, replaceInput[0:5])
  
         b.Close()
  }
diff --git a/services/login-sync/.gitignore b/services/login-sync/.gitignore

new file mode 100644 (file)

index 0000000..cec3cb5
--- /dev/null
+++ b/services/login-sync/.gitignore
@@ -0,0 +1,2 @@
+*.gem
+Gemfile.lock
diff --git a/services/login-sync/Gemfile b/services/login-sync/Gemfile

new file mode 100644 (file)

index 0000000..ffeab22
--- /dev/null
+++ b/services/login-sync/Gemfile
@@ -0,0 +1,7 @@
+source 'https://rubygems.org'
+gemspec
+group :test, :performance do
+  gem 'minitest', '>= 5.0.0'
+  gem 'mocha', require: false
+  gem 'rake'
+end
diff --git a/services/login-sync/Rakefile b/services/login-sync/Rakefile

new file mode 100644 (file)

index 0000000..cf4652f
--- /dev/null
+++ b/services/login-sync/Rakefile
@@ -0,0 +1,8 @@
+require 'rake/testtask'
+
+Rake::TestTask.new do |t|
+  t.libs << 'test'
+end
+
+desc 'Run tests'
+task default: :test
diff --git a/services/login-sync/arvados-login-sync.gemspec b/services/login-sync/arvados-login-sync.gemspec

new file mode 100644 (file)

index 0000000..2f95a55
--- /dev/null
+++ b/services/login-sync/arvados-login-sync.gemspec
@@ -0,0 +1,24 @@
+if not File.exists?('/usr/bin/git') then
+  STDERR.puts "\nGit binary not found, aborting. Please install git and run gem build from a checked out copy of the git repository.\n\n"
+  exit
+end
+
+git_timestamp, git_hash = `git log -n1 --first-parent --format=%ct:%H .`.chomp.split(":")
+git_timestamp = Time.at(git_timestamp.to_i).utc
+
+Gem::Specification.new do |s|
+  s.name        = 'arvados-login-sync'
+  s.version     = "0.1.#{git_timestamp.strftime('%Y%m%d%H%M%S')}"
+  s.date        = git_timestamp.strftime("%Y-%m-%d")
+  s.summary     = "Set up local login accounts for Arvados users"
+  s.description = "Creates and updates local login accounts for Arvados users. Built from git commit #{git_hash}"
+  s.authors     = ["Arvados Authors"]
+  s.email       = 'gem-dev@curoverse.com'
+  s.licenses    = ['GNU Affero General Public License, version 3.0']
+  s.files       = ["bin/arvados-login-sync"]
+  s.executables << "arvados-login-sync"
+  s.required_ruby_version = '>= 2.1.0'
+  s.add_runtime_dependency 'arvados', '~> 0.1', '>= 0.1.20150615153458'
+  s.homepage    =
+    'https://arvados.org'
+end
diff --git a/services/login-sync/bin/arvados-login-sync b/services/login-sync/bin/arvados-login-sync

new file mode 100755 (executable)

index 0000000..fc11e27
--- /dev/null
+++ b/services/login-sync/bin/arvados-login-sync
@@ -0,0 +1,111 @@
+#!/usr/bin/env ruby
+
+require 'rubygems'
+require 'pp'
+require 'arvados'
+require 'etc'
+require 'fileutils'
+require 'yaml'
+
+req_envs = %w(ARVADOS_API_HOST ARVADOS_API_TOKEN ARVADOS_VIRTUAL_MACHINE_UUID)
+req_envs.each do |k|
+  unless ENV[k]
+    abort "Fatal: These environment vars must be set: #{req_envs}"
+  end
+end
+
+keys = ''
+
+seen = Hash.new
+
+begin
+  uids = Hash[Etc.to_enum(:passwd).map { |ent| [ent.name, ent.uid] }]
+  gids = Hash[Etc.to_enum(:group).map { |ent| [ent.name, ent.gid] }]
+  arv = Arvados.new({ :suppress_ssl_warnings => false })
+
+  vm_uuid = ENV['ARVADOS_VIRTUAL_MACHINE_UUID']
+
+  logins = arv.virtual_machine.logins(:uuid => vm_uuid)[:items]
+  logins = [] if logins.nil?
+  logins = logins.reject { |l| l[:username].nil? or l[:hostname].nil? or l[:public_key].nil? or l[:virtual_machine_uuid] != vm_uuid }
+
+  # No system users
+  uid_min = 1000
+  open("/etc/login.defs", encoding: "utf-8") do |login_defs|
+    login_defs.each_line do |line|
+      next unless match = /^UID_MIN\s+(\S+)$/.match(line)
+      if match[1].start_with?("0x")
+        base = 16
+      elsif match[1].start_with?("0")
+        base = 8
+      else
+        base = 10
+      end
+      new_uid_min = match[1].to_i(base)
+      uid_min = new_uid_min if (new_uid_min > 0)
+    end
+  end
+  logins.reject! { |l| (uids[l[:username]] || 65535) < uid_min }
+
+  keys = Hash.new()
+
+  # Collect all keys
+  logins.each do |l|
+    keys[l[:username]] = Array.new() if not keys.has_key?(l[:username])
+    key = l[:public_key]
+    # Handle putty-style ssh public keys
+    key.sub!(/^(Comment: "r[^\n]*\n)(.*)$/m,'ssh-rsa \2 \1')
+    key.sub!(/^(Comment: "d[^\n]*\n)(.*)$/m,'ssh-dss \2 \1')
+    key.gsub!(/\n/,'')
+    key.strip
+
+    keys[l[:username]].push(key) if not keys[l[:username]].include?(key)
+  end
+
+  seen = Hash.new()
+  devnull = open("/dev/null", "w")
+
+  logins.each do |l|
+    next if seen[l[:username]]
+    seen[l[:username]] = true if not seen.has_key?(l[:username])
+    @homedir = "/home/#{l[:username]}"
+
+    unless uids[l[:username]]
+      STDERR.puts "Creating account #{l[:username]}"
+      groups = l[:groups] || []
+      # Adding users to the FUSE group has long been hardcoded behavior.
+      groups << "fuse"
+      groups.select! { |name| gids[name] }
+      # Create new user
+      next unless system("useradd", "-m",
+                         "-c", l[:username],
+                         "-s", "/bin/bash",
+                         "-G", groups.join(","),
+                         l[:username],
+                         out: devnull)
+    end
+    # Create .ssh directory if necessary
+    userdotssh = File.join(@homedir, ".ssh")
+    Dir.mkdir(userdotssh) if !File.exists?(userdotssh)
+    @key = "#######################################################################################
+#  THIS FILE IS MANAGED BY #{$0} -- CHANGES WILL BE OVERWRITTEN  #
+#######################################################################################\n\n"
+    @key += keys[l[:username]].join("\n") + "\n"
+    userauthkeys = File.join(userdotssh, "authorized_keys")
+    if !File.exists?(userauthkeys) or IO::read(userauthkeys) != @key then
+      f = File.new(userauthkeys, 'w')
+      f.write(@key)
+      f.close()
+    end
+    FileUtils.chown_R(l[:username], l[:username], userdotssh)
+    File.chmod(0700, userdotssh)
+    File.chmod(0750, @homedir)
+  end
+
+  devnull.close
+rescue Exception => bang
+  puts "Error: " + bang.to_s
+  puts bang.backtrace.join("\n")
+  exit 1
+end
+
diff --git a/services/login-sync/test/binstub_new_user/useradd b/services/login-sync/test/binstub_new_user/useradd

new file mode 100755 (executable)

index 0000000..173bc1d
--- /dev/null
+++ b/services/login-sync/test/binstub_new_user/useradd
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+stub="${0##*/}"
+
+# Record what actually happened in the "spy" file
+echo "$stub $*" >> "$ARVADOS_LOGIN_SYNC_TMPDIR/spy"
+
+# Exit 0 if this command was listed in the "succeed" file
+exec fgrep -qx -- "$stub $*" "$ARVADOS_LOGIN_SYNC_TMPDIR/succeed"
diff --git a/services/login-sync/test/stubs.rb b/services/login-sync/test/stubs.rb

new file mode 100644 (file)

index 0000000..62d952f
--- /dev/null
+++ b/services/login-sync/test/stubs.rb
@@ -0,0 +1,52 @@
+require 'etc'
+require 'mocha/mini_test'
+require 'ostruct'
+
+module Stubs
+  # These Etc mocks help only when we run arvados-login-sync in-process.
+
+  def setup
+    super
+    ENV['ARVADOS_VIRTUAL_MACHINE_UUID'] = 'testvm2.shell'
+    Etc.stubs(:to_enum).with(:passwd).returns stubpasswd.map { |x| OpenStruct.new x }
+    Etc.stubs(:to_enum).with(:group).returns stubgroup.map { |x| OpenStruct.new x }
+  end
+
+  def stubpasswd
+    [{name: 'root', uid: 0}]
+  end
+
+  def stubgroup
+    [{name: 'root', gid: 0}]
+  end
+
+  # These child-ENV tricks help only when we run arvados-login-sync as a subprocess.
+
+  def setup
+    super
+    @env_was = Hash[ENV]
+    @tmpdir = Dir.mktmpdir
+  end
+
+  def teardown
+    FileUtils.remove_dir(@tmpdir)
+    ENV.select! { |k| @env_was.has_key? k }
+    @env_was.each do |k,v| ENV[k]=v end
+    super
+  end
+
+  def stubenv opts={}
+    # Use UUID of testvm2.shell fixture, unless otherwise specified by test case.
+    Hash[ENV].merge('ARVADOS_VIRTUAL_MACHINE_UUID' => 'zzzzz-2x53u-382brsig8rp3065',
+                    'ARVADOS_LOGIN_SYNC_TMPDIR' => @tmpdir)
+  end
+
+  def invoke_sync opts={}
+    env = stubenv.merge(opts[:env] || {})
+    (opts[:binstubs] || []).each do |binstub|
+      env['PATH'] = File.absolute_path('../binstub_'+binstub, __FILE__) + ':' + env['PATH']
+    end
+    login_sync_path = File.absolute_path '../../bin/arvados-login-sync', __FILE__
+    system env, login_sync_path
+  end
+end
diff --git a/services/login-sync/test/test_add_user.rb b/services/login-sync/test/test_add_user.rb

new file mode 100644 (file)

index 0000000..7a010c2
--- /dev/null
+++ b/services/login-sync/test/test_add_user.rb
@@ -0,0 +1,37 @@
+require 'minitest/autorun'
+
+require 'stubs'
+
+class TestAddUser < Minitest::Test
+  include Stubs
+
+  def test_useradd_error
+    # binstub_new_user/useradd will exit non-zero because its args
+    # won't match any line in this empty file:
+    File.open(@tmpdir+'/succeed', 'w') do |f| end
+    invoke_sync binstubs: ['new_user']
+    spied = File.read(@tmpdir+'/spy')
+    assert_match %r{useradd -m -c active -s /bin/bash -G fuse active}, spied
+    # BUG(TC): This assertion succeeds only if docker and fuse groups
+    # exist on the host, but is insensitive to the admin group (groups
+    # are quietly ignored by login-sync if they don't exist on the
+    # current host).
+    assert_match %r{useradd -m -c adminroot -s /bin/bash -G docker(,admin)?,fuse adminroot}, spied
+  end
+
+  def test_useradd_success
+    # binstub_new_user/useradd will succeed.
+    File.open(@tmpdir+'/succeed', 'w') do |f|
+      f.puts 'useradd -m -c active -s /bin/bash -G fuse active'
+      # Accept either form; see note about groups in test_useradd_error.
+      f.puts 'useradd -m -c adminroot -s /bin/bash -G docker,fuse adminroot'
+      f.puts 'useradd -m -c adminroot -s /bin/bash -G docker,admin,fuse adminroot'
+    end
+    $stderr.puts "*** Expect crash in dir_s_mkdir:"
+    invoke_sync binstubs: ['new_user']
+    assert !$?.success?
+    spied = File.read(@tmpdir+'/spy')
+    # Expect a crash after adding one user, because Dir.mkdir({home}) fails.
+    assert_match %r{^useradd -m -c [^\n]+\n$}s, spied
+  end
+end
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py

index 70817627dfe8d3194435a7a31f1df8e330e37ed5..6d5c223fac15d6e25a95f44446eb88f4b54a6f42 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/__init__.py
@@ -20,12 +20,13 @@ class ComputeNodeStateChangeBase(config.actor_class):
      This base class takes care of retrying changes and notifying
      subscribers when the change is finished.
      """
-    def __init__(self, logger_name, cloud_client, timer_actor,
+    def __init__(self, logger_name, cloud_client, arvados_client, timer_actor,
                   retry_wait, max_retry_wait):
          super(ComputeNodeStateChangeBase, self).__init__()
          self._later = self.actor_ref.proxy()
          self._logger = logging.getLogger(logger_name)
          self._cloud = cloud_client
+        self._arvados = arvados_client
          self._timer = timer_actor
          self.min_retry_wait = retry_wait
          self.max_retry_wait = max_retry_wait
@@ -79,6 +80,18 @@ class ComputeNodeStateChangeBase(config.actor_class):
          else:
              self.subscribers.add(subscriber)
  
+    def _clean_arvados_node(self, arvados_node, explanation):
+        return self._arvados.nodes().update(
+            uuid=arvados_node['uuid'],
+            body={'hostname': None,
+                  'ip_address': None,
+                  'slot_number': None,
+                  'first_ping_at': None,
+                  'last_ping_at': None,
+                  'info': {'ec2_instance_id': None,
+                           'last_action': explanation}},
+            ).execute()
+
  
  class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
      """Actor to create and set up a cloud compute node.
@@ -93,9 +106,8 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
                   cloud_size, arvados_node=None,
                   retry_wait=1, max_retry_wait=180):
          super(ComputeNodeSetupActor, self).__init__(
-            'arvnodeman.nodeup', cloud_client, timer_actor,
+            'arvnodeman.nodeup', cloud_client, arvados_client, timer_actor,
              retry_wait, max_retry_wait)
-        self._arvados = arvados_client
          self.cloud_size = cloud_size
          self.arvados_node = None
          self.cloud_node = None
@@ -104,23 +116,15 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
          else:
              self._later.prepare_arvados_node(arvados_node)
  
-    @ComputeNodeStateChangeBase._retry()
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
      def create_arvados_node(self):
          self.arvados_node = self._arvados.nodes().create(body={}).execute()
          self._later.create_cloud_node()
  
-    @ComputeNodeStateChangeBase._retry()
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
      def prepare_arvados_node(self, node):
-        self.arvados_node = self._arvados.nodes().update(
-            uuid=node['uuid'],
-            body={'hostname': None,
-                  'ip_address': None,
-                  'slot_number': None,
-                  'first_ping_at': None,
-                  'last_ping_at': None,
-                  'info': {'ec2_instance_id': None,
-                           'last_action': "Prepared by Node Manager"}}
-            ).execute()
+        self.arvados_node = self._clean_arvados_node(
+            node, "Prepared by Node Manager")
          self._later.create_cloud_node()
  
      @ComputeNodeStateChangeBase._retry()
@@ -139,8 +143,10 @@ class ComputeNodeSetupActor(ComputeNodeStateChangeBase):
          self._finished()
  
      def stop_if_no_cloud_node(self):
-        if self.cloud_node is None:
-            self.stop()
+        if self.cloud_node is not None:
+            return False
+        self.stop()
+        return True
  
  
  class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
@@ -148,7 +154,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
  
      This actor simply destroys a cloud node, retrying as needed.
      """
-    def __init__(self, timer_actor, cloud_client, node_monitor,
+    def __init__(self, timer_actor, cloud_client, arvados_client, node_monitor,
                   cancellable=True, retry_wait=1, max_retry_wait=180):
          # If a ShutdownActor is cancellable, it will ask the
          # ComputeNodeMonitorActor if it's still eligible before taking each
@@ -156,7 +162,7 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
          # eligible.  Normal shutdowns based on job demand should be
          # cancellable; shutdowns based on node misbehavior should not.
          super(ComputeNodeShutdownActor, self).__init__(
-            'arvnodeman.nodedown', cloud_client, timer_actor,
+            'arvnodeman.nodedown', cloud_client, arvados_client, timer_actor,
              retry_wait, max_retry_wait)
          self._monitor = node_monitor.proxy()
          self.cloud_node = self._monitor.cloud_node.get()
@@ -166,9 +172,16 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
      def on_start(self):
          self._later.shutdown_node()
  
+    def _arvados_node(self):
+        return self._monitor.arvados_node.get()
+
+    def _finished(self, success_flag=None):
+        if success_flag is not None:
+            self.success = success_flag
+        return super(ComputeNodeShutdownActor, self)._finished()
+
      def cancel_shutdown(self):
-        self.success = False
-        self._finished()
+        self._finished(success_flag=False)
  
      def _stop_if_window_closed(orig_func):
          @functools.wraps(orig_func)
@@ -187,13 +200,20 @@ class ComputeNodeShutdownActor(ComputeNodeStateChangeBase):
      @_stop_if_window_closed
      @ComputeNodeStateChangeBase._retry()
      def shutdown_node(self):
-        if self._cloud.destroy_node(self.cloud_node):
-            self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
-            self.success = True
-            self._finished()
-        else:
+        if not self._cloud.destroy_node(self.cloud_node):
              # Force a retry.
              raise cloud_types.LibcloudError("destroy_node failed")
+        self._logger.info("Cloud node %s shut down.", self.cloud_node.id)
+        arv_node = self._arvados_node()
+        if arv_node is None:
+            self._finished(success_flag=True)
+        else:
+            self._later.clean_arvados_node(arv_node)
+
+    @ComputeNodeStateChangeBase._retry(config.ARVADOS_ERRORS)
+    def clean_arvados_node(self, arvados_node):
+        self._clean_arvados_node(arvados_node, "Shut down by Node Manager")
+        self._finished(success_flag=True)
  
      # Make the decorator available to subclasses.
      _stop_if_window_closed = staticmethod(_stop_if_window_closed)
diff --git a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py

index 6eaa8b937b979939c584f9b31927442aa4461d18..71e73f17f64ffbfc16f5b6485622b23fd764cdbf 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
+++ b/services/nodemanager/arvnodeman/computenode/dispatch/slurm.py
@@ -13,7 +13,7 @@ class ComputeNodeShutdownActor(ShutdownActorBase):
      SLURM_END_STATES = frozenset(['down\n', 'down*\n', 'drain\n', 'fail\n'])
  
      def on_start(self):
-        arv_node = self._monitor.arvados_node.get()
+        arv_node = self._arvados_node()
          if arv_node is None:
              return super(ComputeNodeShutdownActor, self).on_start()
          else:
diff --git a/services/nodemanager/arvnodeman/computenode/driver/__init__.py b/services/nodemanager/arvnodeman/computenode/driver/__init__.py

index b703e0d14456706fbadcb71e74f51a8d5aa7688d..724c772733ae0ed1479e7b31d24238a87b591b1f 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/__init__.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/__init__.py
@@ -2,30 +2,50 @@
  
  from __future__ import absolute_import, print_function
  
+from operator import attrgetter
+
  import libcloud.common.types as cloud_types
-from libcloud.compute.base import NodeDriver
+from libcloud.compute.base import NodeDriver, NodeAuthSSHKey
  
  from ...config import NETWORK_ERRORS
  
  class BaseComputeNodeDriver(object):
      """Abstract base class for compute node drivers.
  
-    libcloud abstracts away many of the differences between cloud providers,
-    but managing compute nodes requires some cloud-specific features (e.g.,
-    on EC2 we use tags to identify compute nodes).  Compute node drivers
-    are responsible for translating the node manager's cloud requests to a
-    specific cloud's vocabulary.
+    libcloud drivers abstract away many of the differences between
+    cloud providers, but managing compute nodes requires some
+    cloud-specific features (e.g., keeping track of node FQDNs and
+    boot times).  Compute node drivers are responsible for translating
+    the node manager's cloud requests to a specific cloud's
+    vocabulary.
  
-    Subclasses must implement arvados_create_kwargs (to update node
-    creation kwargs with information about the specific Arvados node
-    record), sync_node, and node_start_time.
+    Subclasses must implement arvados_create_kwargs, sync_node,
+    node_fqdn, and node_start_time.
      """
      CLOUD_ERRORS = NETWORK_ERRORS + (cloud_types.LibcloudError,)
  
      def __init__(self, auth_kwargs, list_kwargs, create_kwargs, driver_class):
+        """Base initializer for compute node drivers.
+
+        Arguments:
+        * auth_kwargs: A dictionary of arguments that are passed into the
+          driver_class constructor to instantiate a libcloud driver.
+        * list_kwargs: A dictionary of arguments that are passed to the
+          libcloud driver's list_nodes method to return the list of compute
+          nodes.
+        * create_kwargs: A dictionary of arguments that are passed to the
+          libcloud driver's create_node method to create a new compute node.
+        * driver_class: The class of a libcloud driver to use.
+        """
          self.real = driver_class(**auth_kwargs)
          self.list_kwargs = list_kwargs
          self.create_kwargs = create_kwargs
+        # Transform entries in create_kwargs.  For each key K, if this class
+        # has an _init_K method, remove the entry and call _init_K with the
+        # corresponding value.  If _init_K returns None, the entry stays out
+        # of the dictionary (we expect we're holding the value somewhere
+        # else, like an instance variable).  Otherwise, _init_K returns a
+        # key-value tuple pair, and we add that entry to create_kwargs.
          for key in self.create_kwargs.keys():
              init_method = getattr(self, '_init_' + key, None)
              if init_method is not None:
@@ -36,10 +56,28 @@ class BaseComputeNodeDriver(object):
      def _init_ping_host(self, ping_host):
          self.ping_host = ping_host
  
-    def search_for(self, term, list_method, key=lambda item: item.id):
+    def _init_ssh_key(self, filename):
+        with open(filename) as ssh_file:
+            key = NodeAuthSSHKey(ssh_file.read())
+        return 'auth', key
+
+    def search_for(self, term, list_method, key=attrgetter('id'), **kwargs):
+        """Return one matching item from a list of cloud objects.
+
+        Raises ValueError if the number of matching objects is not exactly 1.
+
+        Arguments:
+        * term: The value that identifies a matching item.
+        * list_method: A string that names the method to call on this
+          instance's libcloud driver for a list of objects.
+        * key: A function that accepts a cloud object and returns a
+          value search for a `term` match on each item.  Returns the
+          object's 'id' attribute by default.
+        """
          cache_key = (list_method, term)
          if cache_key not in self.SEARCH_CACHE:
-            results = [item for item in getattr(self.real, list_method)()
+            items = getattr(self.real, list_method)(**kwargs)
+            results = [item for item in items
                         if key(item) == term]
              count = len(results)
              if count != 1:
@@ -52,6 +90,17 @@ class BaseComputeNodeDriver(object):
          return self.real.list_nodes(**self.list_kwargs)
  
      def arvados_create_kwargs(self, arvados_node):
+        """Return dynamic keyword arguments for create_node.
+
+        Subclasses must override this method.  It should return a dictionary
+        of keyword arguments to pass to the libcloud driver's create_node
+        method.  These arguments will extend the static arguments in
+        create_kwargs.
+
+        Arguments:
+        * arvados_node: The Arvados node record that will be associated
+          with this cloud node, as returned from the API server.
+        """
          raise NotImplementedError("BaseComputeNodeDriver.arvados_create_kwargs")
  
      def _make_ping_url(self, arvados_node):
@@ -86,6 +135,8 @@ class BaseComputeNodeDriver(object):
  
      @classmethod
      def node_start_time(cls, node):
+        # This method should return the time the node was started, in
+        # seconds since the epoch UTC.
          raise NotImplementedError("BaseComputeNodeDriver.node_start_time")
  
      @classmethod
@@ -94,7 +145,7 @@ class BaseComputeNodeDriver(object):
          # represent API errors.  Return True for any exception that is
          # exactly an Exception, or a better-known higher-level exception.
          return (isinstance(exception, cls.CLOUD_ERRORS) or
-                getattr(exception, '__class__', None) is Exception)
+                type(exception) is Exception)
  
      # Now that we've defined all our own methods, delegate generic, public
      # attributes of libcloud drivers that we haven't defined ourselves.
diff --git a/services/nodemanager/arvnodeman/computenode/driver/azure.py b/services/nodemanager/arvnodeman/computenode/driver/azure.py

new file mode 100644 (file)

index 0000000..014b92c
--- /dev/null
+++ b/services/nodemanager/arvnodeman/computenode/driver/azure.py
@@ -0,0 +1,71 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import time
+
+import libcloud.compute.base as cloud_base
+import libcloud.compute.providers as cloud_provider
+import libcloud.compute.types as cloud_types
+
+from . import BaseComputeNodeDriver
+from .. import arvados_node_fqdn, arvados_timestamp, ARVADOS_TIMEFMT
+
+class ComputeNodeDriver(BaseComputeNodeDriver):
+
+    DEFAULT_DRIVER = cloud_provider.get_driver(cloud_types.Provider.AZURE_ARM)
+    SEARCH_CACHE = {}
+
+    def __init__(self, auth_kwargs, list_kwargs, create_kwargs,
+                 driver_class=DEFAULT_DRIVER):
+
+        if not list_kwargs.get("ex_resource_group"):
+            raise Exception("Must include ex_resource_group in Cloud List configuration (list_kwargs)")
+
+        create_kwargs["ex_resource_group"] = list_kwargs["ex_resource_group"]
+
+        self.tags = {key[4:]: value
+                     for key, value in create_kwargs.iteritems()
+                     if key.startswith('tag_')}
+        # filter out tags from create_kwargs
+        create_kwargs = {key: value
+                         for key, value in create_kwargs.iteritems()
+                         if not key.startswith('tag_')}
+        super(ComputeNodeDriver, self).__init__(
+            auth_kwargs, list_kwargs, create_kwargs,
+            driver_class)
+
+    def arvados_create_kwargs(self, arvados_node):
+        cluster_id, _, node_id = arvados_node['uuid'].split('-')
+        name = 'compute-{}-{}'.format(node_id, cluster_id)
+        tags = {
+            'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
+            'arv-ping-url': self._make_ping_url(arvados_node)
+        }
+        tags.update(self.tags)
+        return {
+            'name': name,
+            'ex_tags': tags,
+        }
+
+    def sync_node(self, cloud_node, arvados_node):
+        self.real.ex_create_tags(cloud_node,
+                                 {'hostname': arvados_node_fqdn(arvados_node)})
+
+    def _init_image(self, urn):
+        return "image", self.get_image(urn)
+
+    def list_nodes(self):
+        # Azure only supports filtering node lists by resource group.
+        # Do our own filtering based on tag.
+        return [node for node in
+                super(ComputeNodeDriver, self).list_nodes()
+                if node.extra["tags"].get("arvados-class") == self.tags["arvados-class"]]
+
+    @classmethod
+    def node_fqdn(cls, node):
+        return node.extra["tags"].get("hostname")
+
+    @classmethod
+    def node_start_time(cls, node):
+        return arvados_timestamp(node.extra["tags"].get("booted_at"))
diff --git a/services/nodemanager/arvnodeman/computenode/driver/ec2.py b/services/nodemanager/arvnodeman/computenode/driver/ec2.py

index 588ca515a9cf3f55abe0b4091928a4c153e41541..6afe3163c9cf89bb287ca3c0a904311345a36555 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/ec2.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/ec2.py
@@ -64,11 +64,6 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
      def _init_subnet_id(self, subnet_id):
          return 'ex_subnet', self.search_for(subnet_id, 'ex_list_subnets')
  
-    def _init_ssh_key(self, filename):
-        with open(filename) as ssh_file:
-            key = cloud_base.NodeAuthSSHKey(ssh_file.read())
-        return 'auth', key
-
      def arvados_create_kwargs(self, arvados_node):
          return {'name': arvados_node_fqdn(arvados_node),
                  'ex_userdata': self._make_ping_url(arvados_node)}
diff --git a/services/nodemanager/arvnodeman/computenode/driver/gce.py b/services/nodemanager/arvnodeman/computenode/driver/gce.py

index 6380d0e342780f418ef3bfa878b0c79c24268377..36bfc96213b9888df3a9afb7bd9c37fd6b76d4e1 100644 (file)
--- a/services/nodemanager/arvnodeman/computenode/driver/gce.py
+++ b/services/nodemanager/arvnodeman/computenode/driver/gce.py
@@ -34,11 +34,17 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
          super(ComputeNodeDriver, self).__init__(
              auth_kwargs, list_kwargs, create_kwargs,
              driver_class)
+        self._disktype_links = {dt.name: self._object_link(dt)
+                                for dt in self.real.ex_list_disktypes()}
  
      @staticmethod
      def _name_key(cloud_object):
          return cloud_object.name
  
+    @staticmethod
+    def _object_link(cloud_object):
+        return cloud_object.extra.get('selfLink')
+
      def _init_image(self, image_name):
          return 'image', self.search_for(
              image_name, 'list_images', self._name_key)
@@ -59,14 +65,39 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
  
      def arvados_create_kwargs(self, arvados_node):
          cluster_id, _, node_id = arvados_node['uuid'].split('-')
-        result = {'name': 'compute-{}-{}'.format(node_id, cluster_id),
+        name = 'compute-{}-{}'.format(node_id, cluster_id)
+        disks = [
+            {'autoDelete': True,
+             'boot': True,
+             'deviceName': name,
+             'initializeParams':
+                 {'diskName': name,
+                  'diskType': self._disktype_links['pd-standard'],
+                  'sourceImage': self._object_link(self.create_kwargs['image']),
+                  },
+             'type': 'PERSISTENT',
+             },
+            {'autoDelete': True,
+             'boot': False,
+             # Boot images rely on this device name to find the SSD.
+             # Any change must be coordinated in the image.
+             'deviceName': 'tmp',
+             'initializeParams':
+                 {'diskType': self._disktype_links['local-ssd'],
+                  },
+             'type': 'SCRATCH',
+             },
+            ]
+        result = {'name': name,
                    'ex_metadata': self.create_kwargs['ex_metadata'].copy(),
-                  'ex_tags': list(self.node_tags)}
-        result['ex_metadata']['arv-ping-url'] = self._make_ping_url(
-            arvados_node)
-        result['ex_metadata']['booted_at'] = time.strftime(ARVADOS_TIMEFMT,
-                                                           time.gmtime())
-        result['ex_metadata']['hostname'] = arvados_node_fqdn(arvados_node)
+                  'ex_tags': list(self.node_tags),
+                  'ex_disks_gce_struct': disks,
+                  }
+        result['ex_metadata'].update({
+                'arv-ping-url': self._make_ping_url(arvados_node),
+                'booted_at': time.strftime(ARVADOS_TIMEFMT, time.gmtime()),
+                'hostname': arvados_node_fqdn(arvados_node),
+                })
          return result
  
      def list_nodes(self):
diff --git a/services/nodemanager/arvnodeman/daemon.py b/services/nodemanager/arvnodeman/daemon.py

index 836b673e9267818d37bd6bd750465a8e33a2b2b4..44f15132643f558e3ed3591935ac1f517de589d5 100644 (file)
--- a/services/nodemanager/arvnodeman/daemon.py
+++ b/services/nodemanager/arvnodeman/daemon.py
@@ -202,10 +202,10 @@ class NodeManagerDaemonActor(actor_class):
                     [self.cloud_nodes, self.booted, self.booting])
  
      def _nodes_busy(self):
-        return sum(1 for idle in
-                   pykka.get_all(rec.actor.in_state('idle') for rec in
+        return sum(1 for busy in
+                   pykka.get_all(rec.actor.in_state('busy') for rec in
                                   self.cloud_nodes.nodes.itervalues())
-                   if idle is False)
+                   if busy)
  
      def _nodes_wanted(self):
          up_count = self._nodes_up()
@@ -299,8 +299,7 @@ class NodeManagerDaemonActor(actor_class):
          if (nodes_excess < 1) or not self.booting:
              return None
          for key, node in self.booting.iteritems():
-            node.stop_if_no_cloud_node().get()
-            if not node.actor_ref.is_alive():
+            if node.stop_if_no_cloud_node().get():
                  del self.booting[key]
                  if nodes_excess > 1:
                      self._later.stop_booting_node()
@@ -312,6 +311,7 @@ class NodeManagerDaemonActor(actor_class):
              return None
          shutdown = self._node_shutdown.start(
              timer_actor=self._timer, cloud_client=self._new_cloud(),
+            arvados_client=self._new_arvados(),
              node_monitor=node_actor.actor_ref, cancellable=cancellable).proxy()
          self.shutdowns[cloud_node_id] = shutdown
          shutdown.subscribe(self._later.node_finished_shutdown)
@@ -328,7 +328,7 @@ class NodeManagerDaemonActor(actor_class):
                  break
          else:
              return None
-        if record.arvados_node is None:
+        if not record.actor.in_state('idle', 'busy').get():
              self._begin_node_shutdown(record.actor, cancellable=False)
  
      def node_finished_shutdown(self, shutdown_actor):
@@ -345,12 +345,14 @@ class NodeManagerDaemonActor(actor_class):
      def shutdown(self):
          self._logger.info("Shutting down after signal.")
          self.poll_stale_after = -1  # Inhibit starting/stopping nodes
-        for bootnode in self.booting.itervalues():
-            bootnode.stop_if_no_cloud_node()
+        setup_stops = {key: node.stop_if_no_cloud_node()
+                       for key, node in self.booting.iteritems()}
+        self.booting = {key: self.booting[key]
+                        for key in setup_stops if not setup_stops[key].get()}
          self._later.await_shutdown()
  
      def await_shutdown(self):
-        if any(node.actor_ref.is_alive() for node in self.booting.itervalues()):
+        if self.booting:
              self._timer.schedule(time.time() + 1, self._later.await_shutdown)
          else:
              self.stop()
diff --git a/services/nodemanager/bin/arvados-node-manager b/services/nodemanager/bin/arvados-node-manager

old mode 100644 (file)

new mode 100755 (executable)
diff --git a/services/nodemanager/doc/azure.example.cfg b/services/nodemanager/doc/azure.example.cfg

new file mode 100644 (file)

index 0000000..ceeff5a
--- /dev/null
+++ b/services/nodemanager/doc/azure.example.cfg
@@ -0,0 +1,152 @@
+# Azure configuration for Arvados Node Manager.
+# All times are in seconds unless specified otherwise.
+
+[Daemon]
+# The dispatcher can customize the start and stop procedure for
+# cloud nodes.  For example, the SLURM dispatcher drains nodes
+# through SLURM before shutting them down.
+#dispatcher = slurm
+
+# Node Manager will ensure that there are at least this many nodes
+# running at all times.
+min_nodes = 0
+
+# Node Manager will not start any compute nodes when at least this
+# many are running.
+max_nodes = 8
+
+# Poll Azure nodes and Arvados for new information every N seconds.
+poll_time = 60
+
+# Polls have exponential backoff when services fail to respond.
+# This is the longest time to wait between polls.
+max_poll_time = 300
+
+# If Node Manager can't succesfully poll a service for this long,
+# it will never start or stop compute nodes, on the assumption that its
+# information is too outdated.
+poll_stale_after = 600
+
+# If Node Manager boots a cloud node, and it does not pair with an Arvados
+# node before this long, assume that there was a cloud bootstrap failure and
+# shut it down.  Note that normal shutdown windows apply (see the Cloud
+# section), so this should be shorter than the first shutdown window value.
+boot_fail_after = 1800
+
+# "Node stale time" affects two related behaviors.
+# 1. If a compute node has been running for at least this long, but it
+# isn't paired with an Arvados node, do not shut it down, but leave it alone.
+# This prevents the node manager from shutting down a node that might
+# actually be doing work, but is having temporary trouble contacting the
+# API server.
+# 2. When the Node Manager starts a new compute node, it will try to reuse
+# an Arvados node that hasn't been updated for this long.
+node_stale_after = 14400
+
+# File path for Certificate Authorities
+certs_file = /etc/ssl/certs/ca-certificates.crt
+
+[Logging]
+# Log file path
+file = /var/log/arvados/node-manager.log
+
+# Log level for most Node Manager messages.
+# Choose one of DEBUG, INFO, WARNING, ERROR, or CRITICAL.
+# WARNING lets you know when polling a service fails.
+# INFO additionally lets you know when a compute node is started or stopped.
+level = INFO
+
+# You can also set different log levels for specific libraries.
+# Pykka is the Node Manager's actor library.
+# Setting this to DEBUG will display tracebacks for uncaught
+# exceptions in the actors, but it's also very chatty.
+pykka = WARNING
+
+# Setting apiclient to INFO will log the URL of every Arvados API request.
+apiclient = WARNING
+
+[Arvados]
+host = zyxwv.arvadosapi.com
+token = ARVADOS_TOKEN
+timeout = 15
+
+# Accept an untrusted SSL certificate from the API server?
+insecure = no
+
+[Cloud]
+provider = azure
+
+# Shutdown windows define periods of time when a node may and may not be shut
+# down.  These are windows in full minutes, separated by commas.  Counting from
+# the time the node is booted, the node WILL NOT shut down for N1 minutes; then
+# it MAY shut down for N2 minutes; then it WILL NOT shut down for N3 minutes;
+# and so on.  For example, "20, 999999" means the node may shut down between
+# the 20th and 999999th minutes of uptime.
+# Azure bills by the minute, so it makes sense to agressively shut down idle
+# nodes.  Specify at least two windows.  You can add as many as you need beyond
+# that.
+shutdown_windows = 20, 999999
+
+[Cloud Credentials]
+# Use "azure account list" with the azure CLI to get these values.
+tenant_id = 00000000-0000-0000-0000-000000000000
+subscription_id = 00000000-0000-0000-0000-000000000000
+
+# The following directions are based on
+# https://azure.microsoft.com/en-us/documentation/articles/resource-group-authenticate-service-principal/
+#
+# azure ad app create --name "<Your Application Display Name>" --home-page "<https://YourApplicationHomePage>" --identifier-uris "<https://YouApplicationUri>" --password <Your_Password>
+# azure ad sp create "<Application_Id>"
+# azure role assignment create --objectId "<Object_Id>" -o Owner -c /subscriptions/{subscriptionId}/
+#
+# Use <Application_Id> for "key" and the <Your_Password> for "secret"
+#
+key = 00000000-0000-0000-0000-000000000000
+secret = PASSWORD
+timeout = 60
+region = East US
+
+[Cloud List]
+# The resource group in which the compute node virtual machines will be created
+# and listed.
+ex_resource_group = ArvadosResourceGroup
+
+[Cloud Create]
+# The image id, in the form "Publisher:Offer:SKU:Version"
+image = Canonical:UbuntuServer:14.04.3-LTS:14.04.201508050
+
+# Path to a local ssh key file that will be used to provision new nodes.
+ssh_key = /home/arvadosuser/.ssh/id_rsa.pub
+
+# The account name for the admin user that will be provisioned on new nodes.
+ex_user_name = arvadosuser
+
+# The Azure storage account that will be used to store the node OS disk images.
+ex_storage_account = arvadosstorage
+
+# The virtual network the VMs will be associated with.
+ex_network = ArvadosNetwork
+
+# Optional subnet of the virtual network.
+#ex_subnet = default
+
+# Node tags
+tag_arvados-class = dynamic-compute
+tag_cluster = zyxwv
+
+# the API server to ping
+ping_host = hostname:port
+
+[Size Standard_D3]
+# You can define any number of Size sections to list Azure sizes you're
+# willing to use.  The Node Manager should boot the cheapest size(s) that
+# can run jobs in the queue (N.B.: defining more than one size has not been
+# tested yet).
+# Each size section MUST define the number of cores are available in this
+# size class (since libcloud does not provide any consistent API for exposing
+# this setting).
+# You may also want to define the amount of scratch space (expressed
+# in GB) for Crunch jobs.  You can also override Microsoft's provided
+# data fields by setting the same names here.
+cores = 4
+scratch = 200
diff --git a/services/nodemanager/setup.py b/services/nodemanager/setup.py

index d9fcbcf6bfead2ae176e887ee6a2c3cdc877a169..16df376021cd6593622036256af161432c144aed 100644 (file)
--- a/services/nodemanager/setup.py
+++ b/services/nodemanager/setup.py
@@ -30,9 +30,12 @@ setup(name='arvados-node-manager',
          'pykka',
          'python-daemon',
          ],
+      dependency_links = [
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev1.zip"
+      ],
        scripts=['bin/arvados-node-manager'],
        test_suite='tests',
-      tests_require=['mock>=1.0'],
+      tests_require=['mock>=1.0', "apache-libcloud==0.18.1.dev1"],
        zip_safe=False,
        cmdclass={'egg_info': tagger},
        )
diff --git a/services/nodemanager/tests/__init__.py b/services/nodemanager/tests/__init__.py

index c5eaf7636c7082bb08a8c78fd1485a71a62ace1c..e5e9ee39ac4b671a8ee1af1f951b6e874e06b2f3 100644 (file)
--- a/services/nodemanager/tests/__init__.py
+++ b/services/nodemanager/tests/__init__.py
@@ -7,7 +7,7 @@ import os
  loglevel = os.environ.get('ANMTEST_LOGLEVEL', 'CRITICAL')
  logging.basicConfig(level=getattr(logging, loglevel.upper()))
  
-# Set the ANM_TIMEOUT environment variable to the maximum amount of time to
+# Set the ANMTEST_TIMEOUT environment variable to the maximum amount of time to
  # wait for tested actors to respond to important messages.  The default value
  # is very conservative, because a small value may produce false negatives on
  # slower systems.  If you're debugging a known timeout issue, however, you may
diff --git a/services/nodemanager/tests/test_computenode_dispatch.py b/services/nodemanager/tests/test_computenode_dispatch.py

index b8cf0ee408130f8203ac1d53d332ffb28d9b5659..c22e7a0e0b8d16a0d55b782df27e1c0bfe3ecacb 100644 (file)
--- a/services/nodemanager/tests/test_computenode_dispatch.py
+++ b/services/nodemanager/tests/test_computenode_dispatch.py
@@ -79,14 +79,16 @@ class ComputeNodeSetupActorTestCase(testutil.ActorTestMixin, unittest.TestCase):
          self.make_mocks(
              arverror.ApiError(httplib2.Response({'status': '500'}), ""))
          self.make_actor()
-        self.setup_actor.stop_if_no_cloud_node()
+        self.assertTrue(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
          self.assertTrue(
              self.setup_actor.actor_ref.actor_stopped.wait(self.TIMEOUT))
  
      def test_no_stop_when_cloud_node(self):
          self.make_actor()
          self.wait_for_assignment(self.setup_actor, 'cloud_node')
-        self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT)
+        self.assertFalse(
+            self.setup_actor.stop_if_no_cloud_node().get(self.TIMEOUT))
          self.assertTrue(self.stop_proxy(self.setup_actor),
                          "actor was stopped by stop_if_no_cloud_node")
  
@@ -119,6 +121,7 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
          self.shutdowns = testutil.MockShutdownTimer()
          self.shutdowns._set_state(shutdown_open, 300)
          self.cloud_client = mock.MagicMock(name='cloud_client')
+        self.arvados_client = mock.MagicMock(name='arvados_client')
          self.updates = mock.MagicMock(name='update_mock')
          if cloud_node is None:
              cloud_node = testutil.cloud_node_mock()
@@ -133,7 +136,8 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
              testutil.cloud_node_fqdn, self.timer, self.updates,
              self.arvados_node)
          self.shutdown_actor = self.ACTOR_CLASS.start(
-            self.timer, self.cloud_client, monitor_actor, cancellable).proxy()
+            self.timer, self.cloud_client, self.arvados_client, monitor_actor,
+            cancellable).proxy()
          self.monitor_actor = monitor_actor.proxy()
  
      def check_success_flag(self, expected, allow_msg_count=1):
@@ -155,6 +159,31 @@ class ComputeNodeShutdownActorMixin(testutil.ActorTestMixin):
          self.cloud_client.destroy_node.return_value = True
          self.check_success_flag(True)
  
+    def test_arvados_node_cleaned_after_shutdown(self, *mocks):
+        cloud_node = testutil.cloud_node_mock(62)
+        arv_node = testutil.arvados_node_mock(62)
+        self.make_mocks(cloud_node, arv_node)
+        self.make_actor()
+        self.check_success_flag(True, 3)
+        update_mock = self.arvados_client.nodes().update
+        self.assertTrue(update_mock.called)
+        update_kwargs = update_mock.call_args_list[0][1]
+        self.assertEqual(arv_node['uuid'], update_kwargs.get('uuid'))
+        self.assertIn('body', update_kwargs)
+        for clear_key in ['slot_number', 'hostname', 'ip_address',
+                          'first_ping_at', 'last_ping_at']:
+            self.assertIn(clear_key, update_kwargs['body'])
+            self.assertIsNone(update_kwargs['body'][clear_key])
+        self.assertTrue(update_mock().execute.called)
+
+    def test_arvados_node_not_cleaned_after_shutdown_cancelled(self, *mocks):
+        cloud_node = testutil.cloud_node_mock(61)
+        arv_node = testutil.arvados_node_mock(61)
+        self.make_mocks(cloud_node, arv_node, shutdown_open=False)
+        self.make_actor(cancellable=True)
+        self.check_success_flag(False, 2)
+        self.assertFalse(self.arvados_client.nodes().update.called)
+
  
  class ComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
                                         unittest.TestCase):
diff --git a/services/nodemanager/tests/test_computenode_dispatch_slurm.py b/services/nodemanager/tests/test_computenode_dispatch_slurm.py

index 93cc60d4e8c93bb2d124b1bbe05ca722c3736541..ac3ebf0435ac4e8a780cf0399b5015d51001bbba 100644 (file)
--- a/services/nodemanager/tests/test_computenode_dispatch_slurm.py
+++ b/services/nodemanager/tests/test_computenode_dispatch_slurm.py
@@ -66,3 +66,8 @@ class SLURMComputeNodeShutdownActorTestCase(ComputeNodeShutdownActorMixin,
          self.check_success_flag(False, 2)
          self.check_slurm_got_args(proc_mock, 'NodeName=compute99',
                                    'State=RESUME')
+
+    def test_arvados_node_cleaned_after_shutdown(self, proc_mock):
+        proc_mock.return_value = 'drain\n'
+        super(SLURMComputeNodeShutdownActorTestCase,
+              self).test_arvados_node_cleaned_after_shutdown()
diff --git a/services/nodemanager/tests/test_computenode_driver_azure.py b/services/nodemanager/tests/test_computenode_driver_azure.py

new file mode 100644 (file)

index 0000000..90fea0c
--- /dev/null
+++ b/services/nodemanager/tests/test_computenode_driver_azure.py
@@ -0,0 +1,89 @@
+#!/usr/bin/env python
+
+from __future__ import absolute_import, print_function
+
+import ssl
+import time
+import unittest
+
+import libcloud.common.types as cloud_types
+import mock
+
+import arvnodeman.computenode.driver.azure as azure
+from . import testutil
+
+class AzureComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
+    TEST_CLASS = azure.ComputeNodeDriver
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        list_kwargs.setdefault("ex_resource_group", "TestResourceGroup")
+        return super(AzureComputeNodeDriverTestCase, self).new_driver(auth_kwargs, list_kwargs, create_kwargs)
+
+    def test_driver_instantiation(self):
+        kwargs = {'key': 'testkey'}
+        driver = self.new_driver(auth_kwargs=kwargs)
+        self.assertTrue(self.driver_mock.called)
+        self.assertEqual(kwargs, self.driver_mock.call_args[1])
+
+    def test_create_image_loaded_at_initialization(self):
+        get_method = self.driver_mock().get_image
+        get_method.return_value = testutil.cloud_object_mock('id_b')
+        driver = self.new_driver(create_kwargs={'image': 'id_b'})
+        self.assertEqual(1, get_method.call_count)
+
+    def test_create_includes_ping(self):
+        arv_node = testutil.arvados_node_mock(info={'ping_secret': 'ssshh'})
+        arv_node["hostname"] = None
+        driver = self.new_driver()
+        driver.create_node(testutil.MockSize(1), arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertIn('ping_secret=ssshh',
+                      create_method.call_args[1].get('ex_tags', {}).get('arv-ping-url', ""))
+
+    def test_name_from_new_arvados_node(self):
+        arv_node = testutil.arvados_node_mock(hostname=None)
+        driver = self.new_driver()
+        self.assertEqual('compute-000000000000063-zzzzz',
+                         driver.arvados_create_kwargs(arv_node)['name'])
+
+    def check_node_tagged(self, cloud_node, expected_tags):
+        tag_mock = self.driver_mock().ex_create_tags
+        self.assertTrue(tag_mock.called)
+        self.assertIs(cloud_node, tag_mock.call_args[0][0])
+        self.assertEqual(expected_tags, tag_mock.call_args[0][1])
+
+    def test_node_create_time(self):
+        refsecs = int(time.time())
+        reftuple = time.gmtime(refsecs)
+        node = testutil.cloud_node_mock()
+        node.extra = {'tags': {'booted_at': time.strftime('%Y-%m-%dT%H:%M:%S.000Z',
+                                                   reftuple)}}
+        self.assertEqual(refsecs, azure.ComputeNodeDriver.node_start_time(node))
+
+    def test_node_fqdn(self):
+        name = 'fqdntest.zzzzz.arvadosapi.com'
+        node = testutil.cloud_node_mock()
+        node.extra = {'tags': {"hostname": name}}
+        self.assertEqual(name, azure.ComputeNodeDriver.node_fqdn(node))
+
+    def test_cloud_exceptions(self):
+        for error in [Exception("test exception"),
+                      IOError("test exception"),
+                      ssl.SSLError("test exception"),
+                      cloud_types.LibcloudError("test exception")]:
+            self.assertTrue(azure.ComputeNodeDriver.is_cloud_exception(error),
+                            "{} not flagged as cloud exception".format(error))
+
+    def test_noncloud_exceptions(self):
+        self.assertFalse(
+            azure.ComputeNodeDriver.is_cloud_exception(ValueError("test error")),
+            "ValueError flagged as cloud exception")
+
+    def test_sync_node(self):
+        arv_node = testutil.arvados_node_mock(1)
+        cloud_node = testutil.cloud_node_mock(2)
+        driver = self.new_driver()
+        driver.sync_node(cloud_node, arv_node)
+        self.check_node_tagged(cloud_node,
+                               {'hostname': 'compute1.zzzzz.arvadosapi.com'})
diff --git a/services/nodemanager/tests/test_computenode_driver_ec2.py b/services/nodemanager/tests/test_computenode_driver_ec2.py

index 595f1f4c621b94037fc6eb7da50673bc5a14f696..d28a2a6441baedeebe4ebeec6f6861d580146cc5 100644 (file)
--- a/services/nodemanager/tests/test_computenode_driver_ec2.py
+++ b/services/nodemanager/tests/test_computenode_driver_ec2.py
@@ -34,7 +34,7 @@ class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          list_method = self.driver_mock().list_images
          list_method.return_value = [testutil.cloud_object_mock(c)
                                      for c in 'abc']
-        driver = self.new_driver(create_kwargs={'image_id': 'b'})
+        driver = self.new_driver(create_kwargs={'image_id': 'id_b'})
          self.assertEqual(1, list_method.call_count)
  
      def test_create_includes_ping_secret(self):
diff --git a/services/nodemanager/tests/test_computenode_driver_gce.py b/services/nodemanager/tests/test_computenode_driver_gce.py

index 465adc5e615df9cee4150f248c20b6064f0bae94..b9d7ee9fd0d27e34b5e1996095abfdd9c5d69219 100644 (file)
--- a/services/nodemanager/tests/test_computenode_driver_gce.py
+++ b/services/nodemanager/tests/test_computenode_driver_gce.py
@@ -14,6 +14,20 @@ from . import testutil
  class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
      TEST_CLASS = gce.ComputeNodeDriver
  
+    def setUp(self):
+        super(GCEComputeNodeDriverTestCase, self).setUp()
+        self.driver_mock().list_images.return_value = [
+            testutil.cloud_object_mock('testimage', selfLink='image-link')]
+        self.driver_mock().ex_list_disktypes.return_value = [
+            testutil.cloud_object_mock(name, selfLink=name + '-link')
+            for name in ['pd-standard', 'pd-ssd', 'local-ssd']]
+        self.driver_mock.reset_mock()
+
+    def new_driver(self, auth_kwargs={}, list_kwargs={}, create_kwargs={}):
+        create_kwargs.setdefault('image', 'testimage')
+        return super(GCEComputeNodeDriverTestCase, self).new_driver(
+            auth_kwargs, list_kwargs, create_kwargs)
+
      def test_driver_instantiation(self):
          kwargs = {'user_id': 'foo'}
          driver = self.new_driver(auth_kwargs=kwargs)
@@ -24,7 +38,7 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          image_mocks = [testutil.cloud_object_mock(c) for c in 'abc']
          list_method = self.driver_mock().list_images
          list_method.return_value = image_mocks
-        driver = self.new_driver(create_kwargs={'image': 'B'})
+        driver = self.new_driver(create_kwargs={'image': 'b'})
          self.assertEqual(1, list_method.call_count)
  
      def test_create_includes_ping_secret(self):
@@ -50,6 +64,26 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          self.assertEqual(['testA', 'testB'],
                           self.driver_mock().create_node.call_args[1]['ex_tags'])
  
+    def test_create_with_two_disks_attached(self):
+        driver = self.new_driver(create_kwargs={'image': 'testimage'})
+        driver.create_node(testutil.MockSize(1), testutil.arvados_node_mock())
+        create_disks = self.driver_mock().create_node.call_args[1].get(
+            'ex_disks_gce_struct', [])
+        self.assertEqual(2, len(create_disks))
+        self.assertTrue(create_disks[0].get('autoDelete'))
+        self.assertTrue(create_disks[0].get('boot'))
+        self.assertEqual('PERSISTENT', create_disks[0].get('type'))
+        init_params = create_disks[0].get('initializeParams', {})
+        self.assertEqual('pd-standard-link', init_params.get('diskType'))
+        self.assertEqual('image-link', init_params.get('sourceImage'))
+        # Our node images expect the SSD to be named `tmp` to find and mount it.
+        self.assertEqual('tmp', create_disks[1].get('deviceName'))
+        self.assertTrue(create_disks[1].get('autoDelete'))
+        self.assertFalse(create_disks[1].get('boot', 'unset'))
+        self.assertEqual('SCRATCH', create_disks[1].get('type'))
+        init_params = create_disks[1].get('initializeParams', {})
+        self.assertEqual('local-ssd-link', init_params.get('diskType'))
+
      def test_list_nodes_requires_tags_match(self):
          # A node matches if our list tags are a subset of the node's tags.
          # Test behavior with no tags, no match, partial matches, different
@@ -82,7 +116,7 @@ class GCEComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
          driver = self.new_driver()
          driver.sync_node(cloud_node, arv_node)
          args, kwargs = self.driver_mock().connection.async_request.call_args
-        self.assertEqual('/zones/TESTZONE/instances/2/setMetadata', args[0])
+        self.assertEqual('/zones/testzone/instances/2/setMetadata', args[0])
          for key in ['kind', 'fingerprint']:
              self.assertEqual(start_metadata[key], kwargs['data'][key])
          plain_metadata['hostname'] = 'compute1.zzzzz.arvadosapi.com'
diff --git a/services/nodemanager/tests/test_daemon.py b/services/nodemanager/tests/test_daemon.py

index dc8fdc3f8496b9d90d43fdabca4b922120875a6f..b406f1357671f0efe85813004ff2ddb0629584a1 100644 (file)
--- a/services/nodemanager/tests/test_daemon.py
+++ b/services/nodemanager/tests/test_daemon.py
@@ -183,6 +183,19 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
          self.last_setup.arvados_node.get.return_value = arv_node
          return self.last_setup
  
+    def test_no_new_node_when_booted_node_not_usable(self):
+        cloud_node = testutil.cloud_node_mock(4)
+        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node])
+        self.daemon.update_server_wishlist(
+            [testutil.MockSize(1)]).get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.node_setup.start.call_count)
+
      def test_no_duplication_when_booting_node_listed_fast(self):
          # Test that we don't start two ComputeNodeMonitorActors when
          # we learn about a booting node through a listing before we
@@ -270,6 +283,18 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
          self.stop_proxy(self.daemon)
          self.assertShutdownCancellable(False)
  
+    def test_booted_node_shut_down_when_never_working(self):
+        cloud_node = testutil.cloud_node_mock(4)
+        arv_node = testutil.arvados_node_mock(4, crunch_worker_state='down')
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
+        self.timer.deliver()
+        self.stop_proxy(self.daemon)
+        self.assertShutdownCancellable(False)
+
      def test_node_that_pairs_not_considered_failed_boot(self):
          cloud_node = testutil.cloud_node_mock(3)
          arv_node = testutil.arvados_node_mock(3)
@@ -282,12 +307,42 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
          self.stop_proxy(self.daemon)
          self.assertFalse(self.node_shutdown.start.called)
  
+    def test_node_that_pairs_busy_not_considered_failed_boot(self):
+        cloud_node = testutil.cloud_node_mock(5)
+        arv_node = testutil.arvados_node_mock(5, job_uuid=True)
+        setup = self.start_node_boot(cloud_node, arv_node)
+        self.daemon.node_up(setup).get(self.TIMEOUT)
+        self.assertEqual(1, self.alive_monitor_count())
+        self.daemon.update_cloud_nodes([cloud_node])
+        self.daemon.update_arvados_nodes([arv_node]).get(self.TIMEOUT)
+        self.timer.deliver()
+        self.stop_proxy(self.daemon)
+        self.assertFalse(self.node_shutdown.start.called)
+
      def test_booting_nodes_shut_down(self):
          self.make_daemon(want_sizes=[testutil.MockSize(1)])
          self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
          self.stop_proxy(self.daemon)
          self.assertTrue(self.last_setup.stop_if_no_cloud_node.called)
  
+    def test_all_booting_nodes_tried_to_shut_down(self):
+        size = testutil.MockSize(2)
+        self.make_daemon(want_sizes=[size])
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        setup1 = self.last_setup
+        setup1.stop_if_no_cloud_node().get.return_value = False
+        setup1.stop_if_no_cloud_node.reset_mock()
+        self.daemon.update_server_wishlist([size, size]).get(self.TIMEOUT)
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        self.assertIsNot(setup1, self.last_setup)
+        self.last_setup.stop_if_no_cloud_node().get.return_value = True
+        self.last_setup.stop_if_no_cloud_node.reset_mock()
+        self.daemon.update_server_wishlist([]).get(self.TIMEOUT)
+        self.daemon.max_nodes.get(self.TIMEOUT)
+        self.stop_proxy(self.daemon)
+        self.assertEqual(1, self.last_setup.stop_if_no_cloud_node.call_count)
+        self.assertTrue(setup1.stop_if_no_cloud_node.called)
+
      def test_shutdown_declined_at_wishlist_capacity(self):
          cloud_node = testutil.cloud_node_mock(1)
          size = testutil.MockSize(1)
@@ -384,6 +439,8 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
  
      def test_clean_shutdown_waits_for_node_setup_finish(self):
          new_node = self.start_node_boot()
+        new_node.stop_if_no_cloud_node().get.return_value = False
+        new_node.stop_if_no_cloud_node.reset_mock()
          self.daemon.shutdown().get(self.TIMEOUT)
          self.assertTrue(new_node.stop_if_no_cloud_node.called)
          self.daemon.node_up(new_node).get(self.TIMEOUT)
@@ -393,9 +450,11 @@ class NodeManagerDaemonActorTestCase(testutil.ActorTestMixin,
              self.daemon.actor_ref.actor_stopped.wait(self.TIMEOUT))
  
      def test_wishlist_ignored_after_shutdown(self):
-        size = testutil.MockSize(2)
-        self.make_daemon(want_sizes=[size])
+        new_node = self.start_node_boot()
+        new_node.stop_if_no_cloud_node().get.return_value = False
+        new_node.stop_if_no_cloud_node.reset_mock()
          self.daemon.shutdown().get(self.TIMEOUT)
+        size = testutil.MockSize(2)
          self.daemon.update_server_wishlist([size] * 2).get(self.TIMEOUT)
          self.timer.deliver()
          self.stop_proxy(self.daemon)
diff --git a/services/nodemanager/tests/testutil.py b/services/nodemanager/tests/testutil.py

index 650a23217afc85ceda317567432d29042414adaa..82d6479e24ae53b33b676637ca772d867326b196 100644 (file)
--- a/services/nodemanager/tests/testutil.py
+++ b/services/nodemanager/tests/testutil.py
@@ -34,13 +34,14 @@ def arvados_node_mock(node_num=99, job_uuid=None, age=-1, **kwargs):
      node.update(kwargs)
      return node
  
-def cloud_object_mock(name_id):
+def cloud_object_mock(name_id, **extra):
      # A very generic mock, useful for stubbing libcloud objects we
      # only search for and pass around, like locations, subnets, etc.
      cloud_object = mock.NonCallableMagicMock(['id', 'name'],
                                               name='cloud_object')
-    cloud_object.id = str(name_id)
-    cloud_object.name = cloud_object.id.upper()
+    cloud_object.name = str(name_id)
+    cloud_object.id = 'id_' + cloud_object.name
+    cloud_object.extra = extra
      return cloud_object
  
  def cloud_node_mock(node_num=99, **extra):
author	Joshua C. Randall <jcrandall@alum.mit.edu>
	Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)
committer	Joshua C. Randall <jcrandall@alum.mit.edu>
	Fri, 4 Sep 2015 09:55:37 +0000 (10:55 +0100)