Merge branch 'master' into 2596-refactor-pipeline-create

author Tom Clegg <tom@curoverse.com>

Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)

committer Tom Clegg <tom@curoverse.com>

Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)
author Tom Clegg <tom@curoverse.com>
Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)
committer Tom Clegg <tom@curoverse.com>
Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)
diff --git a/apps/workbench/app/assets/javascripts/selection.js b/apps/workbench/app/assets/javascripts/selection.js

index 9213b70a712754ebbb4911519b873334ea474c33..d70794dc0a58f41ea71b59b2258934766a824cab 100644 (file)
--- a/apps/workbench/app/assets/javascripts/selection.js
+++ b/apps/workbench/app/assets/javascripts/selection.js
@@ -91,7 +91,7 @@ jQuery(function($){
                  checkboxes[i].checked = false;
              }
          }
-        
+
          $('.remove-selection').on('click', remove_selection_click);
          $('#clear_selections_button').on('click', clear_selections);
      };
@@ -100,7 +100,7 @@ jQuery(function($){
          on('change', '.persistent-selection:checkbox', function(e) {
              //console.log($(this));
              //console.log($(this).val());
-            
+
              var inc = 0;
              if ($(this).is(":checked")) {
                  add_selection($(this).val(), $(this).attr('friendly_name'), $(this).attr('href'), $(this).attr('friendly_type'));
@@ -148,7 +148,10 @@ select_form_sources  = null;
  
                  for (var i = 0; i < lst.length; i++) {
                      if (lst[i].type == type) {
-                        ret.push({text: lst[i].name, value: lst[i].uuid})
+                        var n = lst[i].name;
+                        n = n.replace(/<span[^>]*>/i, "[");
+                        n = n.replace(/<\/span>/i, "]");
+                        ret.push({text: n, value: lst[i].uuid})
                      }
                  }
              }
@@ -169,4 +172,3 @@ select_form_sources  = null;
          return ret;
      };
  })();
-
diff --git a/apps/workbench/app/controllers/collections_controller.rb b/apps/workbench/app/controllers/collections_controller.rb

index f24a77ad1fd126b94e2851bebf9aab292a14db6a..a5ac21595e36e5943a007b64f12b0c156eae54ed 100644 (file)
--- a/apps/workbench/app/controllers/collections_controller.rb
+++ b/apps/workbench/app/controllers/collections_controller.rb
@@ -5,6 +5,7 @@ class CollectionsController < ApplicationController
    def show_pane_list
      %w(Files Attributes Metadata Provenance_graph Used_by JSON API)
    end
+
    def index
      if params[:search].andand.length.andand > 0
        tags = Link.where(any: ['contains', params[:search]])
diff --git a/apps/workbench/app/helpers/application_helper.rb b/apps/workbench/app/helpers/application_helper.rb

index e25bb57578946e9a7969df4a50656205b1ae8d5d..b5d2be8bf4752b3efb8dd22a362690db18b6f32a 100644 (file)
--- a/apps/workbench/app/helpers/application_helper.rb
+++ b/apps/workbench/app/helpers/application_helper.rb
@@ -36,7 +36,7 @@ module ApplicationHelper
          end
        end
      end
-    
+
      return h(n)
        #raw = n.to_s
      #cooked = ''
@@ -73,9 +73,18 @@ module ApplicationHelper
          if opts[:with_class_name]
            link_name = "#{resource_class.to_s}: #{link_name}"
          end
+        if !opts[:no_tags] and resource_class == Collection
+          Link.where(head_uuid: link_uuid, link_class: ["tag", "identifier"]).each do |tag|
+            link_name += ' <span class="label label-info">' + html_escape(tag.name) + '</span>'
+          end
+        end
        end
        style_opts[:class] = (style_opts[:class] || '') + ' nowrap'
-      link_to link_name, { controller: resource_class.to_s.tableize, action: 'show', id: link_uuid }, style_opts
+      if opts[:no_link]
+        raw(link_name)
+      else
+        link_to raw(link_name), { controller: resource_class.to_s.tableize, action: 'show', id: link_uuid }, style_opts
+      end
      else
        attrvalue
      end
@@ -174,19 +183,32 @@ module ApplicationHelper
      end
  
      items = []
+    attrtext = attrvalue
      if dataclass and dataclass.is_a? Class
        if attrvalue and !attrvalue.empty?
-        items.append({name: attrvalue, uuid: attrvalue, type: dataclass.to_s})
+        Link.where(head_uuid: attrvalue, link_class: ["tag", "identifier"]).each do |tag|
+          attrtext += " [#{tag.name}]"
+        end
+        items.append({name: attrtext, uuid: attrvalue, type: dataclass.to_s})
        end
        #dataclass.where(uuid: attrvalue).each do |item|
        #  items.append({name: item.uuid, uuid: item.uuid, type: dataclass.to_s})
        #end
+      itemuuids = []
        dataclass.limit(10).each do |item|
+        itemuuids << item.uuid
          items.append({name: item.uuid, uuid: item.uuid, type: dataclass.to_s})
        end
+      Link.where(head_uuid: itemuuids, link_class: ["tag", "identifier"]).each do |tag|
+        items.each do |item|
+          if item.uuid == tag.head_uuid
+            item.name += ' [' + tag.name + ']'
+          end
+        end
+      end
      end
  
-    lt = link_to attrvalue, '#', {
+    lt = link_to attrtext, '#', {
        "data-emptytext" => "none",
        "data-placement" => "bottom",
        "data-type" => datatype,
@@ -210,6 +232,6 @@ module ApplicationHelper
  
      lt += raw("</script>")
  
-    lt 
+    lt
    end
  end
diff --git a/apps/workbench/app/views/application/_selection_checkbox.html.erb b/apps/workbench/app/views/application/_selection_checkbox.html.erb

index 4d47d892c56817386982489119855e25146c99bb..7af11752c8e2540e439c0fdc1d7ce4d906b78080 100644 (file)
--- a/apps/workbench/app/views/application/_selection_checkbox.html.erb
+++ b/apps/workbench/app/views/application/_selection_checkbox.html.erb
@@ -1,8 +1,19 @@
  <%if object %>
+  <% fn = if defined? friendly_name
+            friendly_name
+          else
+            link_to_if_arvados_object object, {no_link: true}
+          end
+     %>
+  <% # This 'fn' string may contain embedded HTML which is already marked html_safe.
+     # Since we are putting it into a tag attribute, we need to copy into an
+     # unsafe string so that rails will escape it for us.
+     fn = String.new fn %>
  <%= check_box_tag 'uuids[]', object.uuid, false, {
-  :class => 'persistent-selection', 
-  :friendly_type => object.class.name,
-  :friendly_name => object.selection_label,
-  :href => "#{url_for controller: object.class.name.tableize, action: 'show', id: object.uuid }" 
+      :class => 'persistent-selection',
+      :friendly_type => object.class.name,
+      :friendly_name => fn,
+      :href => "#{url_for controller: object.class.name.tableize, action: 'show', id: object.uuid }",
+      :title => "Click to add this item to your selection list"
  } %>
  <% end %>
diff --git a/apps/workbench/app/views/collections/_index_tbody.html.erb b/apps/workbench/app/views/collections/_index_tbody.html.erb

index 96b73979eb60a645b69a9be0d67c4fa91d553617..75745376c0bb5c560dda892a016fd4c917594a66 100644 (file)
--- a/apps/workbench/app/views/collections/_index_tbody.html.erb
+++ b/apps/workbench/app/views/collections/_index_tbody.html.erb
@@ -2,10 +2,14 @@
  
  <tr class="collection" data-object-uuid="<%= c.uuid %>">
    <td>
-    <%= render :partial => "selection_checkbox", :locals => {:object => c} %>
+    <% friendly_name = c.friendly_link_name  %>
+    <% @collection_info[c.uuid][:tag_links].each do |tag_link| %>
+      <% friendly_name += raw(" <span class='label label-info'>#{tag_link.name}</span>") %>
+    <% end %>
+    <%= render :partial => "selection_checkbox", :locals => {:object => c, :friendly_name => friendly_name} %>
    </td>
    <td>
-    <%= link_to_if_arvados_object c.uuid %>
+    <%= link_to_if_arvados_object c.uuid, {:no_tags => true } %>
    </td>
    <td>
      <% i = 0 %>
diff --git a/apps/workbench/app/views/collections/_show_files.html.erb b/apps/workbench/app/views/collections/_show_files.html.erb

index 956958eddb9fe0785946eb7a36bd0f866e00996e..34d24411f6d3ee218f98345177bd5c0a0db96908 100644 (file)
--- a/apps/workbench/app/views/collections/_show_files.html.erb
+++ b/apps/workbench/app/views/collections/_show_files.html.erb
@@ -24,10 +24,11 @@
        <tr>
          <td>
            <%= check_box_tag 'uuids[]', @object.uuid+'/'+file_path, false, {
-                :class => 'persistent-selection', 
+                :class => 'persistent-selection',
                  :friendly_type => "File",
                  :friendly_name => "#{@object.uuid}/#{file_path}",
-                :href => "#{url_for controller: 'collections', action: 'show', id: @object.uuid }/#{file_path}" 
+                :href => "#{url_for controller: 'collections', action: 'show', id: @object.uuid }/#{file_path}",
+                :title => "Click to add this item to your selection list"
                } %>
          </td>
          <td>
diff --git a/apps/workbench/app/views/layouts/application.html.erb b/apps/workbench/app/views/layouts/application.html.erb

index 724fa9bd9ffb52a0d71a961041fdf1181eaea353..e23265adb45acff3979ccccf11ee586e0ba88694 100644 (file)
--- a/apps/workbench/app/views/layouts/application.html.erb
+++ b/apps/workbench/app/views/layouts/application.html.erb
@@ -85,7 +85,7 @@
                <span class="glyphicon glyphicon-arrow-right"></span>
              </li>
              <li>
-              <%= link_to controller.breadcrumb_page_name, request.fullpath %>
+              <%= link_to_if_arvados_object @object %>
              </li>
              <li style="padding: 14px 0 14px">
                <%= form_tag do |f| %>
diff --git a/apps/workbench/test/integration/users_test.rb b/apps/workbench/test/integration/users_test.rb

index 22b92c02e1d23dbc55740f8c6209744a5a3a30df..df7d2453a7396ea568c6ffddbcdcaaf4a7ed3bd9 100644 (file)
--- a/apps/workbench/test/integration/users_test.rb
+++ b/apps/workbench/test/integration/users_test.rb
@@ -122,7 +122,7 @@ class UsersTest < ActionDispatch::IntegrationTest
        click_button "Submit"
      end
  
-    sleep(0.1)
+    sleep(1)
      assert page.has_text? 'modified_by_client_uuid'
  
      click_link 'Metadata'
diff --git a/doc/_config.yml b/doc/_config.yml

index 1fadc55d7d18ca90eaf8062cec9e157074480c45..714c66a81fe23729020b67c732a1a2b93ba368e9 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -6,7 +6,7 @@
  # You can also set these on the command line:
  # $ rake generate baseurl=/example arvados_api_host=example.comA
  
-baseurl: 
+baseurl:
  arvados_api_host: localhost
  arvados_workbench_host: localhost
  
@@ -40,7 +40,6 @@ navbar:
      - Reference:
        - user/reference/api-tokens.html.textile.liquid
        - user/reference/sdk-cli.html.textile.liquid
-      - user/reference/job-and-pipeline-reference.html.textile.liquid
      - Arvados License:
        - user/copying/copying.html.textile.liquid
        - user/copying/agpl-3.0.html
diff --git a/doc/api/methods/jobs.html.textile.liquid b/doc/api/methods/jobs.html.textile.liquid

index a84d70abbfc97da020d86a2b92cc2b4b258660fa..351a77792fb838987b57638b10ce98ee8a2ae961 100644 (file)
--- a/doc/api/methods/jobs.html.textile.liquid
+++ b/doc/api/methods/jobs.html.textile.liquid
@@ -21,7 +21,7 @@ table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
  {background:#ccffcc}.|uuid|string||path||
  
-h2. create
+h2(#create). create
  
  Create a new Job.
  
@@ -29,7 +29,97 @@ Arguments:
  
  table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
-|job|object||query||
+{background:#ccffcc}.|job|object||query||
+
+Attributes of 'job' parameter:
+
+table(table table-bordered table-condensed).
+|_. Attribute               |_. Type|_. Accepted values                            |_. Description|
+{background:#ccffcc}.|script                 |string     |filename                                      |The actual script that will be run by crunch.  Must be the name of an executable file in the crunch_scripts/ directory at the git revision specified by script_version.|
+{background:#ccffcc}.|script_version         |string     |git branch, tag, or commit hash               |The version of code to run, which must be available in the specified repository.|
+{background:#ccffcc}.|repository             |string     |name of git repository hosted by Arvados      |The repository to search for script_version.|
+{background:#ccffcc}.|script_parameters      |object     |any JSON object                               |The input parameters for the job, with the parameter names as keys mapping to parameter values.|
+|minimum_script_version |string     |git branch, tag, or commit hash              |The minimum acceptable script version (earliest ancestor) to consider when deciding whether to re-use a past job.|
+|exclude_script_versions|array of strings|git branch, tag, or commit hash|Script versions to exclude when deciding whether to re-use a past job.|
+|nondeterministic       |boolean    |                                              |If true, never re-use a past job, and flag this job so it will never be considered for re-use.|
+|no_reuse               |boolean    |                                              |If true, do not re-use a past job, but this job may be re-used.|
+
+When a job is executed, the 'script_version' field is resolved to an exact git revision and the git hash for that revision is recorded in 'script_version'.  If 'script_version' can't be resolved, the job submission will be rejected.
+
+h3. Reusing jobs
+
+Because Arvados records the exact version of the script, input parameters, and runtime environment [1] that was used to run the job, if the script is deterministic (meaning that the same code version is guaranteed to produce the same outputs from the same inputs) then it is possible to re-use the results of past jobs, and avoid re-running the computation to save time.  Arvados uses the following algorithm to determine if a past job can be re-used:
+
+notextile. <div class="spaced-out">
+
+# If 'nondeterministic' or 'no_reuse' are true, always create a new job.
+# Find a list of acceptable values for 'script_version'.  If 'minimum_script_version' is specified, this is the set of all revisions in the git commit graph between 'minimum_script_version' and 'script_version' (inclusive) [2].  If 'minimum_script_version' is not specified, only 'script_version' is added to the list.  If 'exclude_script_versions' is specified, the listed versions are excluded from the list.
+# Select jobs have the same 'script' and 'script_parameters' attributes, and where the 'script_version' attribute is in the list of acceptable versions.  Exclude jobs that failed or set 'nondeterministic' to true.
+# If there is more than one candidate job, check that all selected past jobs actually did produce the same output.
+# If everything passed, re-use one of the selected past jobs (if there is more than one match, which job will be returned is undefined).  Otherwise create a new job.
+
+fn1. As of this writing, versioning the runtime environment is still under development.
+
+fn2. This may include parallel branches if there is more than one path between 'minimum_script_version' and 'script_version' in the git commit graph.  Use 'exclude_script_versions' to blacklist specific versions.
+
+</div>
+
+h3. Examples
+
+Run the script "crunch_scripts/hash.py" in the repository "you" using the "master" branch head.  Arvados is allowed to re-use a previous job if the script_version of the past job is the same as the "master" branch head (i.e., there have not been any subsequent commits to "master").
+
+<notextile><pre>
+{
+  "script": "hash.py",
+  "repository": "<b>you</b>",
+  "script_version": "master",
+  "script_parameters": {
+    "input": "c1bad4b39ca5a924e481008009d94e32+210"
+  }
+}
+</pre></notextile>
+
+Run using exactly the version "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5". Arvados is allowed to re-use a previous job if the "script_version" of that job is also "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5".
+
+<notextile><pre>
+{
+  "script": "hash.py",
+  "repository": "<b>you</b>",
+  "script_version": "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5",
+  "script_parameters": {
+    "input": "c1bad4b39ca5a924e481008009d94e32+210"
+  }
+}
+</pre></notextile>
+
+Arvados is allowed to re-use a previous job if the "script_version" of the past job is between "earlier_version_tag" and the head of the "master" branch (inclusive), but not "blacklisted_version_tag".  If there are no previous jobs, run the job using the head of the "master" branch as specified in "script_version".
+
+<notextile><pre>
+{
+  "script": "hash.py",
+  "repository": "<b>you</b>",
+  "minimum_script_version": "earlier_version_tag",
+  "script_version": "master",
+  "exclude_script_versions": ["blacklisted_version_tag"],
+  "script_parameters": {
+    "input": "c1bad4b39ca5a924e481008009d94e32+210"
+  }
+}
+</pre></notextile>
+
+Run the script "crunch_scripts/monte-carlo.py" in the repository "you" using the "master" branch head.  Because it is marked as "nondeterministic", never re-use previous jobs, and never re-use this job.
+
+<notextile><pre>
+{
+  "script": "monte-carlo.py",
+  "repository": "<b>you</b>",
+  "script_version": "master",
+  "nondeterministic": true,
+  "script_parameters": {
+    "input": "c1bad4b39ca5a924e481008009d94e32+210"
+  }
+}
+</pre></notextile>
  
  h2. delete
  
diff --git a/doc/api/schema/Job.html.textile.liquid b/doc/api/schema/Job.html.textile.liquid

index 54e1b27713c29e6946021ae8a60f6f2008989024..55fb234580bb998973f0aa47a307dde336ca1812 100644 (file)
--- a/doc/api/schema/Job.html.textile.liquid
+++ b/doc/api/schema/Job.html.textile.liquid
@@ -19,21 +19,6 @@ See "REST methods for working with Arvados resources":{{site.baseurl}}/api/metho
  
  API endpoint base: @https://{{ site.arvados_api_host }}/arvados/v1/jobs@
  
-h3. Additional parameters for "Create" method
-
-table(table table-bordered table-condensed).
-|_. Attribute               |_. Type|_. Accepted values                            |_. Required|_. Description|
-|script                 |string     |filename                                      |yes        |The actual script that will be run by crunch.  Must be the name of an executable file in the crunch_scripts/ directory at the git revision specified by script_version.|
-|script_version         |string     |git branch, tag, or version hash              |yes        |The code version to run, which is available in the specified repository.  May be a git hash or tag to specify an exact version, or a branch.  If it is a branch, use the branch head.|
-|repository             |string     |name of git repository hosted by Arvados      |yes        |The repository to search for script_version.|
-|script_parameters      |object     |any JSON object                               |yes        |The input parameters for the job, with the parameter names as keys mapping to parameter values.|
-|minimum_script_version |string     |git branch, tag, or version hash              |no         |The minimum acceptable script version when deciding whether to re-use a past job.|
-|exclude_script_versions|array of strings|git branch, tag, or version hash|no         |Script versions to exclude when deciding whether to re-use a past job.|
-|nondeterministic       |boolean    |                                              |no         |If true, never re-use a past job, and flag this job so it will never be considered for re-use.|
-|no_reuse               |boolean    |                                              |no         |If true, do not re-use a past job, but this job may be re-used.|
-
-See the "job and pipeline reference":{{site.baseurl}}/user/reference/job-and-pipeline-reference.html for more information.
-
  h3. Queue
  
  <pre>
@@ -59,7 +44,7 @@ table(table table-bordered table-condensed).
  |runtime_constraints{}|list|Constraints that must be satisfied by the job/task scheduler in order to run the job.|See below.|
  |script|string|Name of crunch script in @/crunch_scripts/@||
  |script_parameters{}|list|Parameters passed to MapReduce script||
-|script_version|string|The git ref of the the git commit used to run the job.|  When the job starts, Arvados updates this field to the precise git commit hash used by the job.|
+|script_version|string|git branch, tag, or commit hash.|Before the job starts, this is the git branch, tag, or hash supplied by the user.  When the job starts, Arvados updates this field to the full 40-character git hash of the actual commit used by the job.|
  |repository             |string     |The repository from which the script_version will be fetched.|
  |cancelled_by_client_uuid|string|API client ID|Is null if job has not been cancelled|
  |cancelled_by_user_uuid|string|Authenticated user ID|Is null if job has not been cancelled|
diff --git a/doc/api/schema/PipelineTemplate.html.textile.liquid b/doc/api/schema/PipelineTemplate.html.textile.liquid

index c8681c3565f95b74a0fa801483ea1493ef608ae9..6060e598ecf285c197ff771258d768979f6e4446 100644 (file)
--- a/doc/api/schema/PipelineTemplate.html.textile.liquid
+++ b/doc/api/schema/PipelineTemplate.html.textile.liquid
@@ -3,12 +3,135 @@ layout: default
  navsection: api
  navmenu: Schema
  title: PipelineTemplate
-
  ...
  
+Pipelines consist of a set of "components".  Each component is an Arvados job submission.  "Parameters for job submissions are described on the job method page.":{{site.baseurl}}/api/methods/jobs.html#create
+
+table(table table-bordered table-condensed).
+|_. Attribute    |_. Type |_. Accepted values                           |_. Required|_. Description|
+|name            |string  |any                                          |yes        |The human-readable name of the pipeline template.|
+|components      |object  |JSON object containing job submission objects|yes        |The component jobs that make up the pipeline, with the component name as the key. |
+
+h3. Script parameters
+
+When used in a pipeline, each parameter in the 'script_parameters' attribute of a component job can specify that the input parameter must be supplied by the user, or the input parameter should be linked to the output of another component.  To do this, the value of the parameter should be JSON object containing one of the following attributes:
+
+table(table table-bordered table-condensed).
+|_. Attribute    |_. Type |_. Accepted values                               |_. Description|
+|default         |any     |any                                              |The default value for this parameter.|
+|required        |boolean |true or false                                    |Specifies whether the parameter is required to have a value or not.|
+|dataclass       |string  |One of 'Collection', 'File' [1], 'number', or 'text' |Data type of this parameter.|
+|output_of       |string  |the name of another component in the pipeline    |Specifies that the value of this parameter should be set to the 'output' attribute of the job that corresponds to the specified component.|
+
+The 'output_of' parameter is especially important, as this is how components are actually linked together to form a pipeline.  Component jobs that depend on the output of other components do not run until the parent job completes and has produced output.  If the parent job fails, the entire pipeline fails.
+
+fn1. The 'File' type refers to a specific file within a Keep collection in the form 'collection_hash/filename', for example '887cd41e9c613463eab2f0d885c6dd96+83/bob.txt'.
+
+h3. Examples
+
+This is a pipeline named "Filter md5 hash values" with two components, "do_hash" and "filter".  The "input" script parameter of the "do_hash" component is required to be filled in by the user, and the expected data type is "Collection".  This also specifies that the "input" script parameter of the "filter" component is the output of "do_hash", so "filter" will not run until "do_hash" completes successfully.  When the pipeline runs, past jobs that meet the criteria described above may be substituted for either or both components to avoid redundant computation.
+
+<notextile><pre>
+{
+  "name": "Filter md5 hash values",
+  "components": {
+    "do_hash": {
+      "script": "hash.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": {
+        "input": {
+          "required": true,
+          "dataclass": "Collection"
+        }
+      },
+    },
+    "filter": {
+      "script": "0-filter.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": {
+        "input": {
+          "output_of": "do_hash"
+        }
+      },
+    }
+  }
+}
+</pre></notextile>
+
+This pipeline consists of three components.  The components "thing1" and "thing2" both depend on "cat_in_the_hat".  Once the "cat_in_the_hat" job is complete, both "thing1" and "thing2" can run in parallel, because they do not depend on each other.
+
+<notextile><pre>
+{
+  "name": "Wreck the house",
+  "components": {
+    "cat_in_the_hat": {
+      "script": "cat.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": { }
+    },
+    "thing1": {
+      "script": "thing1.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": {
+        "input": {
+          "output_of": "cat_in_the_hat"
+        }
+      },
+    },
+    "thing2": {
+      "script": "thing2.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": {
+        "input": {
+          "output_of": "cat_in_the_hat"
+        }
+      },
+    },
+  }
+}
+</pre></notextile>
+
+This pipeline consists of three components.  The component "cleanup" depends on "thing1" and "thing2".  Both "thing1" and "thing2" are started immediately and can run in parallel, because they do not depend on each other, but "cleanup" cannot begin until both "thing1" and "thing2" have completed.
+
+<notextile><pre>
+{
+  "name": "Clean the house",
+  "components": {
+    "thing1": {
+      "script": "thing1.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": { }
+    },
+    "thing2": {
+      "script": "thing2.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": { }
+    },
+    "cleanup": {
+      "script": "cleanup.py",
+      "repository": "<b>you</b>",
+      "script_version": "master",
+      "script_parameters": {
+        "mess1": {
+          "output_of": "thing1"
+        },
+        "mess2": {
+          "output_of": "thing2"
+        }
+      }
+    }
+  }
+}
+</pre></notextile>
  
  
-A **PipelineTemplate** represents...
  
  h2. Methods
  
diff --git a/doc/user/reference/job-and-pipeline-reference.html.textile.liquid b/doc/user/reference/job-and-pipeline-reference.html.textile.liquid

deleted file mode 100644 (file)

index c3fed61..0000000
--- a/doc/user/reference/job-and-pipeline-reference.html.textile.liquid
+++ /dev/null
@@ -1,223 +0,0 @@
----
-layout: default
-navsection: userguide
-title: "Job and Pipeline Reference"
-...
-
-h2. Submitting jobs
-
-table(table table-bordered table-condensed).
-|_. Attribute               |_. Type|_. Accepted values                            |_. Required|_. Description|
-|script                 |string     |filename                                      |yes        |The actual script that will be run by crunch.  Must be the name of an executable file in the crunch_scripts/ directory at the git revision specified by script_version.|
-|script_version         |string     |git branch, tag, or version hash              |yes        |The code version to run, which is available in the specified repository.  May be a git hash or tag to specify an exact version, or a branch.  If it is a branch, use the branch head.|
-|repository             |string     |name of git repository hosted by Arvados      |yes        |The repository to search for script_version.|
-|script_parameters      |object     |any JSON object                               |yes        |The input parameters for the job, with the parameter names as keys mapping to parameter values.|
-|minimum_script_version |string     |git branch, tag, or version hash              |no         |The minimum acceptable script version when deciding whether to re-use a past job.|
-|exclude_script_versions|array of strings|git branch, tag, or version hash|no         |Script versions to exclude when deciding whether to re-use a past job.|
-|nondeterministic       |boolean    |                                              |no         |If true, never re-use a past job, and flag this job so it will never be considered for re-use.|
-|no_reuse               |boolean    |                                              |no         |If true, do not re-use a past job, but this job may be re-used.|
-
-When a job is executed, the 'script_version' field is resolved to an exact git revision and the git hash for that revision is recorded in 'script_version'.  If 'script_version' can't be resolved, the job submission will be rejected.
-
-h3. Reusing jobs
-
-Because Arvados records the exact version of the script, input parameters, and runtime environment [1] that was used to run the job, if the script is deterministic (meaning that the same code version is guaranteed to produce the same outputs from the same inputs) then it is possible to re-use the results of past jobs, and avoid re-running the computation to save time.  Arvados uses the following algorithm to determine if a past job can be re-used:
-
-notextile. <div class="spaced-out">
-
-# If 'nondeterministic' or 'no_reuse' are true, always create a new job.
-# Find a list of acceptable values for 'script_version'.  If 'minimum_script_version' is specified, this is the set of all revisions in the git commit graph between 'minimum_script_version' and 'script_version' (inclusive) [2].  If 'minimum_script_version' is not specified, only 'script_version' is added to the list.  If 'exclude_script_versions' is specified, the listed versions are excluded from the list.
-# Select jobs have the same 'script' and 'script_parameters' attributes, and where the 'script_version' attribute is in the list of acceptable versions.  Exclude jobs that failed or set 'nondeterministic' to true.
-# If there is more than one candidate job, check that all selected past jobs actually did produce the same output.
-# If everything passed, re-use one of the selected past jobs (if there is more than one match, which job will be returned is undefined).  Otherwise create a new job.
-
-fn1. As of this writing, versioning the runtime environment is still under development.
-
-fn2. This may include parallel branches if there is more than one path between 'minimum_script_version' and 'script_version' in the git commit graph.  Use 'exclude_script_versions' to blacklist specific versions.
-
-</div>
-
-h3. Examples
-
-Run the script "crunch_scripts/hash.py" in the repository "you" using the "master" branch head.  Arvados is allowed to re-use a previous job if the script_version of the past job is the same as the "master" branch head (i.e., there have not been any subsequent commits to "master").
-
-<notextile><pre>
-{
-  "script": "hash.py",
-  "repository": "<b>you</b>",
-  "script_version": "master",
-  "script_parameters": {
-    "input": "c1bad4b39ca5a924e481008009d94e32+210"
-  }
-}
-</pre></notextile>
-
-Run using exactly the version "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5". Arvados is allowed to re-use a previous job if the "script_version" of that job is also "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5".
-
-<notextile><pre>
-{
-  "script": "hash.py",
-  "repository": "<b>you</b>",
-  "script_version": "d00220fb38d4b85ca8fc28a8151702a2b9d1dec5",
-  "script_parameters": {
-    "input": "c1bad4b39ca5a924e481008009d94e32+210"
-  }
-}
-</pre></notextile>
-
-Arvados is allowed to re-use a previous job if the "script_version" of the past job is between "earlier_version_tag" and the head of the "master" branch (inclusive), but not "blacklisted_version_tag".  If there are no previous jobs, run the job using the head of the "master" branch as specified in "script_version".
-
-<notextile><pre>
-{
-  "script": "hash.py",
-  "repository": "<b>you</b>",
-  "minimum_script_version": "earlier_version_tag",
-  "script_version": "master",
-  "exclude_script_versions": ["blacklisted_version_tag"],
-  "script_parameters": {
-    "input": "c1bad4b39ca5a924e481008009d94e32+210"
-  }
-}
-</pre></notextile>
-
-Run the script "crunch_scripts/monte-carlo.py" in the repository "you" using the "master" branch head.  Because it is marked as "nondeterministic", never re-use previous jobs, and never re-use this job.
-
-<notextile><pre>
-{
-  "script": "monte-carlo.py",
-  "repository": "<b>you</b>",
-  "script_version": "master",
-  "nondeterministic": true,
-  "script_parameters": {
-    "input": "c1bad4b39ca5a924e481008009d94e32+210"
-  }
-}
-</pre></notextile>
-
-h2. Pipelines
-
-Pipelines consist of a set of "components".  Each component is an Arvados job submission, so when a component job is submitted, Arvados may re-use past jobs based on the rules described above.
-
-table(table table-bordered table-condensed).
-|_. Attribute    |_. Type |_. Accepted values                           |_. Required|_. Description|
-|name            |string  |any                                          |yes        |The human-readable name of the pipeline template.|
-|components      |object  |JSON object containing job submission objects|yes        |The component jobs that make up the pipeline, with the component name as the key. |
-
-h3. Script parameters
-
-When used in a pipeline, each parameter in the 'script_parameters' attribute of a component job can specify that the input parameter must be supplied by the user, or the input parameter should be linked to the output of another component.  To do this, the value of the parameter should be JSON object containing one of the following attributes:
-
-table(table table-bordered table-condensed).
-|_. Attribute    |_. Type |_. Accepted values                               |_. Description|
-|default         |any     |any                                              |The default value for this parameter.|
-|required        |boolean |true or false                                    |Specifies whether the parameter is required to have a value or not.|
-|dataclass       |string  |One of 'Collection', 'File' [3], 'number', or 'text' |Data type of this parameter.|
-|output_of       |string  |the name of another component in the pipeline    |Specifies that the value of this parameter should be set to the 'output' attribute of the job that corresponds to the specified component.|
-
-The 'output_of' parameter is especially important, as this is how components are actually linked together to form a pipeline.  Component jobs that depend on the output of other components do not run until the parent job completes and has produced output.  If the parent job fails, the entire pipeline fails.
-
-fn3. The 'File' type refers to a specific file within a Keep collection in the form 'collection_hash/filename', for example '887cd41e9c613463eab2f0d885c6dd96+83/bob.txt'.
-
-h3. Examples
-
-This is a pipeline named "Filter md5 hash values" with two components, "do_hash" and "filter".  The "input" script parameter of the "do_hash" component is required to be filled in by the user, and the expected data type is "Collection".  This also specifies that the "input" script parameter of the "filter" component is the output of "do_hash", so "filter" will not run until "do_hash" completes successfully.  When the pipeline runs, past jobs that meet the criteria described above may be substituted for either or both components to avoid redundant computation.
-
-<notextile><pre>
-{
-  "name": "Filter md5 hash values",
-  "components": {
-    "do_hash": {
-      "script": "hash.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": {
-        "input": {
-          "required": true,
-          "dataclass": "Collection"
-        }
-      },
-    },
-    "filter": {
-      "script": "0-filter.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": {
-        "input": {
-          "output_of": "do_hash"
-        }
-      },
-    }
-  }
-}
-</pre></notextile>
-
-This pipeline consists of three components.  The components "thing1" and "thing2" both depend on "cat_in_the_hat".  Once the "cat_in_the_hat" job is complete, both "thing1" and "thing2" can run in parallel, because they do not depend on each other.
-
-<notextile><pre>
-{
-  "name": "Wreck the house",
-  "components": {
-    "cat_in_the_hat": {
-      "script": "cat.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": { }
-    },
-    "thing1": {
-      "script": "thing1.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": {
-        "input": {
-          "output_of": "cat_in_the_hat"
-        }
-      },
-    },
-    "thing2": {
-      "script": "thing2.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": {
-        "input": {
-          "output_of": "cat_in_the_hat"
-        }
-      },
-    },
-  }
-}
-</pre></notextile>
-
-This pipeline consists of three components.  The component "cleanup" depends on "thing1" and "thing2".  Both "thing1" and "thing2" are started immediately and can run in parallel, because they do not depend on each other, but "cleanup" cannot begin until both "thing1" and "thing2" have completed.
-
-<notextile><pre>
-{
-  "name": "Clean the house",
-  "components": {
-    "thing1": {
-      "script": "thing1.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": { }
-    },
-    "thing2": {
-      "script": "thing2.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": { }
-    },
-    "cleanup": {
-      "script": "cleanup.py",
-      "repository": "<b>you</b>",
-      "script_version": "master",
-      "script_parameters": {
-        "mess1": {
-          "output_of": "thing1"
-        },
-        "mess2": {
-          "output_of": "thing2"
-        }
-      }
-    }
-  }
-}
-</pre></notextile>
diff --git a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid

index 36187d2dcaf9a03ddbbb3761a0b2267d9c88b024..d0f50f2108eec17571b9f74b7dd5394f51ca7591 100644 (file)
--- a/doc/user/tutorials/tutorial-firstscript.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-firstscript.html.textile.liquid
@@ -139,3 +139,5 @@ Now, use @arv pipeline_template create@ to register your pipeline template in Ar
  </notextile>
  
  Your new pipeline template will appear on the Workbench "Compute %(rarr)&rarr;% Pipeline&nbsp;templates":https://{{ site.arvados_workbench_host }}/pipeline_instances page.  You can run the "pipeline using Workbench":tutorial-pipeline-workbench.html.
+
+For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/schema/PipelineTemplate.html
diff --git a/doc/user/tutorials/tutorial-new-pipeline.html.textile.liquid b/doc/user/tutorials/tutorial-new-pipeline.html.textile.liquid

index a832434287aad6189796c7906e5e06bb20a32162..2acb34d106ade93a8877137884c7bc02932d0950 100644 (file)
--- a/doc/user/tutorials/tutorial-new-pipeline.html.textile.liquid
+++ b/doc/user/tutorials/tutorial-new-pipeline.html.textile.liquid
@@ -76,3 +76,5 @@ Now, use @arv pipeline_template create@ to register your pipeline template in Ar
  </notextile>
  
  Your new pipeline template will appear on the Workbench "Compute %(rarr)&rarr;% Pipeline&nbsp;templates":https://{{ site.arvados_workbench_host }}/pipeline_instances page.
+
+For more information and examples for writing pipelines, see the "pipeline template reference":{{site.baseurl}}/api/schema/PipelineTemplate.html
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job

index 25c1ee0857e521a898fe7711af01e44db7ee9ccf..48a6c9dea7f7f5be8fa20367e46e29de969f5b62 100755 (executable)
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -95,6 +95,15 @@ $ENV{"CRUNCH_INSTALL"} = "$ENV{CRUNCH_TMP}/opt";
  $ENV{"CRUNCH_WORK"} = $ENV{"JOB_WORK"}; # deprecated
  mkdir ($ENV{"JOB_WORK"});
  
+my $arv_cli;
+
+if (defined $ENV{"ARV_CLI"}) {
+  $arv_cli = $ENV{"ARV_CLI"};
+}
+else {
+  $arv_cli = 'arv';
+}
+
  my $force_unlock;
  my $git_dir;
  my $jobspec;
@@ -1074,7 +1083,7 @@ sub fetch_block
    my $hash = shift;
    my ($keep, $child_out, $output_block);
  
-  my $cmd = "arv keep get \Q$hash\E";
+  my $cmd = "$arv_cli keep get \Q$hash\E";
    open($keep, '-|', $cmd) or die "fetch_block: $cmd: $!";
    sysread($keep, $output_block, 64 * 1024 * 1024);
    close $keep;
@@ -1086,7 +1095,7 @@ sub collate_output
    Log (undef, "collate");
  
    my ($child_out, $child_in);
-  my $pid = open2($child_out, $child_in, 'arv', 'keep', 'put', '--raw');
+  my $pid = open2($child_out, $child_in, $arv_cli, 'keep', 'put', '--raw');
    my $joboutput;
    for (@jobstep)
    {
@@ -1236,7 +1245,7 @@ sub save_meta
    return if $justcheckpoint;  # checkpointing is not relevant post-Warehouse.pm
  
    $local_logfile->flush;
-  my $cmd = "arv keep put --filename ''\Q$keep_logfile\E "
+  my $cmd = "$arv_cli keep put --filename ''\Q$keep_logfile\E "
        . quotemeta($local_logfile->filename);
    my $loglocator = `$cmd`;
    die "system $cmd failed: $?" if $?;
diff --git a/services/api/app/controllers/arvados/v1/links_controller.rb b/services/api/app/controllers/arvados/v1/links_controller.rb

index 563804ef15d05df2c2187a56eaa1a80aa612573d..188ecfc1a04a78731697a55da0ad05a5d8706476 100644 (file)
--- a/services/api/app/controllers/arvados/v1/links_controller.rb
+++ b/services/api/app/controllers/arvados/v1/links_controller.rb
@@ -1,13 +1,17 @@
  class Arvados::V1::LinksController < ApplicationController
  
-  def create
-    if resource_attrs[:head_kind] and ArvadosModel::resource_class_for_uuid(resource_attrs[:head_uuid]).kind != resource_attrs[:head_kind]
-      errors.add(attr, "'#{resource_attrs[:head_kind]}' does not match '#{head_uuid}'")
+  def check_uuid_kind uuid, kind
+    if kind and ArvadosModel::resource_class_for_uuid(uuid).andand.kind != kind
+      render :json => { errors: ["'#{kind}' does not match uuid '#{uuid}', expected '#{ArvadosModel::resource_class_for_uuid(uuid).andand.kind}'"] }.to_json, status: 422
+      nil
+    else
+      true
      end
+  end
  
-    if resource_attrs[:tail_kind] and ArvadosModel::resource_class_for_uuid(resource_attrs[:tail_uuid]).kind != resource_attrs[:tail_kind]
-      errors.add(attr, "'#{resource_attrs[:tail_kind]}' does not match '#{tail_uuid}'")
-    end
+  def create
+    return if ! check_uuid_kind resource_attrs[:head_uuid], resource_attrs[:head_kind]
+    return if ! check_uuid_kind resource_attrs[:tail_uuid], resource_attrs[:tail_kind]
  
      resource_attrs.delete :head_kind
      resource_attrs.delete :tail_kind
diff --git a/services/api/test/functional/arvados/v1/links_controller_test.rb b/services/api/test/functional/arvados/v1/links_controller_test.rb

index 4726e0119a7c8a5e0f43d11f30857d5e613a1a7d..ac93c68277cddee6d5fc0550d5438d0af0d57990 100644 (file)
--- a/services/api/test/functional/arvados/v1/links_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/links_controller_test.rb
@@ -240,4 +240,34 @@ class Arvados::V1::LinksControllerTest < ActionController::TestCase
      assert_response 422
    end
  
+  test "test with virtual_machine" do
+    link = {
+      tail_kind: "arvados#user",
+      tail_uuid: users(:active).uuid,
+      head_kind: "arvados#virtual_machine",
+      head_uuid: virtual_machines(:testvm).uuid,
+      link_class: "permission",
+      name: "can_login",
+      properties: {username: "repo_and_user_name"}
+    }
+    authorize_with :admin
+    post :create, link: link
+    assert_response 422
+  end
+
+  test "test with virtualMachine" do
+    link = {
+      tail_kind: "arvados#user",
+      tail_uuid: users(:active).uuid,
+      head_kind: "arvados#virtualMachine",
+      head_uuid: virtual_machines(:testvm).uuid,
+      link_class: "permission",
+      name: "can_login",
+      properties: {username: "repo_and_user_name"}
+    }
+    authorize_with :admin
+    post :create, link: link
+    assert_response :success
+  end
+
  end
diff --git a/services/keep/keep.go b/services/keep/keep.go

index 5113727169ffcd83a976d6074c402fe5be822fd1..74c3b15e4e2d1132cf62009ef0a483f3183927d7 100644 (file)
--- a/services/keep/keep.go
+++ b/services/keep/keep.go
@@ -4,6 +4,7 @@ import (
         "bufio"
         "bytes"
         "crypto/md5"
+       "encoding/json"
         "errors"
         "flag"
         "fmt"
@@ -13,6 +14,8 @@ import (
         "log"
         "net/http"
         "os"
+       "path/filepath"
+       "regexp"
         "strconv"
         "strings"
         "syscall"
@@ -85,29 +88,38 @@ func main() {
         //    by looking at currently mounted filesystems for /keep top-level
         //    directories.
  
-       var listen, keepvols string
+       var listen, volumearg string
         flag.StringVar(&listen, "listen", DEFAULT_ADDR,
                 "interface on which to listen for requests, in the format ipaddr:port. e.g. -listen=10.0.1.24:8000. Use -listen=:port to listen on all network interfaces.")
-       flag.StringVar(&keepvols, "volumes", "",
+       flag.StringVar(&volumearg, "volumes", "",
                 "Comma-separated list of directories to use for Keep volumes, e.g. -volumes=/var/keep1,/var/keep2. If empty or not supplied, Keep will scan mounted filesystems for volumes with a /keep top-level directory.")
         flag.Parse()
  
         // Look for local keep volumes.
-       if keepvols == "" {
+       var keepvols []string
+       if volumearg == "" {
                 // TODO(twp): decide whether this is desirable default behavior.
                 // In production we may want to require the admin to specify
                 // Keep volumes explicitly.
-               KeepVolumes = FindKeepVolumes()
+               keepvols = FindKeepVolumes()
         } else {
-               KeepVolumes = strings.Split(keepvols, ",")
+               keepvols = strings.Split(volumearg, ",")
+       }
+
+       // Check that the specified volumes actually exist.
+       KeepVolumes = []string(nil)
+       for _, v := range keepvols {
+               if _, err := os.Stat(v); err == nil {
+                       log.Println("adding Keep volume:", v)
+                       KeepVolumes = append(KeepVolumes, v)
+               } else {
+                       log.Printf("bad Keep volume: %s\n", err)
+               }
         }
  
         if len(KeepVolumes) == 0 {
                 log.Fatal("could not find any keep volumes")
         }
-       for _, v := range KeepVolumes {
-               log.Println("keep volume:", v)
-       }
  
         // Set up REST handlers.
         //
@@ -115,8 +127,11 @@ func main() {
         // appropriate handler.
         //
         rest := mux.NewRouter()
-       rest.HandleFunc("/{hash:[0-9a-f]{32}}", GetBlockHandler).Methods("GET")
-       rest.HandleFunc("/{hash:[0-9a-f]{32}}", PutBlockHandler).Methods("PUT")
+       rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, GetBlockHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/{hash:[0-9a-f]{32}}`, PutBlockHandler).Methods("PUT")
+       rest.HandleFunc(`/index`, IndexHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/index/{prefix:[0-9a-f]{0,32}}`, IndexHandler).Methods("GET", "HEAD")
+       rest.HandleFunc(`/status.json`, StatusHandler).Methods("GET", "HEAD")
  
         // Tell the built-in HTTP server to direct all requests to the REST
         // router.
@@ -205,6 +220,146 @@ func PutBlockHandler(w http.ResponseWriter, req *http.Request) {
         }
  }
  
+// IndexHandler
+//     A HandleFunc to address /index and /index/{prefix} requests.
+//
+func IndexHandler(w http.ResponseWriter, req *http.Request) {
+       prefix := mux.Vars(req)["prefix"]
+
+       index := IndexLocators(prefix)
+       w.Write([]byte(index))
+}
+
+// StatusHandler
+//     Responds to /status.json requests with the current node status,
+//     described in a JSON structure.
+//
+//     The data given in a status.json response includes:
+//        volumes - a list of Keep volumes currently in use by this server
+//          each volume is an object with the following fields:
+//            * mount_point
+//            * device_num (an integer identifying the underlying filesystem)
+//            * bytes_free
+//            * bytes_used
+//
+type VolumeStatus struct {
+       MountPoint string `json:"mount_point"`
+       DeviceNum  uint64 `json:"device_num"`
+       BytesFree  uint64 `json:"bytes_free"`
+       BytesUsed  uint64 `json:"bytes_used"`
+}
+
+type NodeStatus struct {
+       Volumes []*VolumeStatus `json:"volumes"`
+}
+
+func StatusHandler(w http.ResponseWriter, req *http.Request) {
+       st := GetNodeStatus()
+       if jstat, err := json.Marshal(st); err == nil {
+               w.Write(jstat)
+       } else {
+               log.Printf("json.Marshal: %s\n", err)
+               log.Printf("NodeStatus = %v\n", st)
+               http.Error(w, err.Error(), 500)
+       }
+}
+
+// GetNodeStatus
+//     Returns a NodeStatus struct describing this Keep
+//     node's current status.
+//
+func GetNodeStatus() *NodeStatus {
+       st := new(NodeStatus)
+
+       st.Volumes = make([]*VolumeStatus, len(KeepVolumes))
+       for i, vol := range KeepVolumes {
+               st.Volumes[i] = GetVolumeStatus(vol)
+       }
+       return st
+}
+
+// GetVolumeStatus
+//     Returns a VolumeStatus describing the requested volume.
+//
+func GetVolumeStatus(volume string) *VolumeStatus {
+       var fs syscall.Statfs_t
+       var devnum uint64
+
+       if fi, err := os.Stat(volume); err == nil {
+               devnum = fi.Sys().(*syscall.Stat_t).Dev
+       } else {
+               log.Printf("GetVolumeStatus: os.Stat: %s\n", err)
+               return nil
+       }
+
+       err := syscall.Statfs(volume, &fs)
+       if err != nil {
+               log.Printf("GetVolumeStatus: statfs: %s\n", err)
+               return nil
+       }
+       // These calculations match the way df calculates disk usage:
+       // "free" space is measured by fs.Bavail, but "used" space
+       // uses fs.Blocks - fs.Bfree.
+       free := fs.Bavail * uint64(fs.Bsize)
+       used := (fs.Blocks - fs.Bfree) * uint64(fs.Bsize)
+       return &VolumeStatus{volume, devnum, free, used}
+}
+
+// IndexLocators
+//     Returns a string containing a list of locator ids found on this
+//     Keep server.  If {prefix} is given, return only those locator
+//     ids that begin with the given prefix string.
+//
+//     The return string consists of a sequence of newline-separated
+//     strings in the format
+//
+//         locator+size modification-time
+//
+//     e.g.:
+//
+//         e4df392f86be161ca6ed3773a962b8f3+67108864 1388894303
+//         e4d41e6fd68460e0e3fc18cc746959d2+67108864 1377796043
+//         e4de7a2810f5554cd39b36d8ddb132ff+67108864 1388701136
+//
+func IndexLocators(prefix string) string {
+       var output string
+       for _, vol := range KeepVolumes {
+               filepath.Walk(vol,
+                       func(path string, info os.FileInfo, err error) error {
+                               // This WalkFunc inspects each path in the volume
+                               // and prints an index line for all files that begin
+                               // with prefix.
+                               if err != nil {
+                                       log.Printf("IndexHandler: %s: walking to %s: %s",
+                                               vol, path, err)
+                                       return nil
+                               }
+                               locator := filepath.Base(path)
+                               // Skip directories that do not match prefix.
+                               // We know there is nothing interesting inside.
+                               if info.IsDir() &&
+                                       !strings.HasPrefix(locator, prefix) &&
+                                       !strings.HasPrefix(prefix, locator) {
+                                       return filepath.SkipDir
+                               }
+                               // Skip any file that is not apparently a locator, e.g. .meta files
+                               if is_valid, err := IsValidLocator(locator); err != nil {
+                                       return err
+                               } else if !is_valid {
+                                       return nil
+                               }
+                               // Print filenames beginning with prefix
+                               if !info.IsDir() && strings.HasPrefix(locator, prefix) {
+                                       output = output + fmt.Sprintf(
+                                               "%s+%d %d\n", locator, info.Size(), info.ModTime().Unix())
+                               }
+                               return nil
+                       })
+       }
+
+       return output
+}
+
  func GetBlock(hash string) ([]byte, error) {
         var buf = make([]byte, BLOCKSIZE)
  
@@ -380,6 +535,10 @@ func IsFull(volume string) (isFull bool) {
  //     Returns the amount of available disk space on VOLUME,
  //     as a number of 1k blocks.
  //
+//     TODO(twp): consider integrating this better with
+//     VolumeStatus (e.g. keep a NodeStatus object up-to-date
+//     periodically and use it as the source of info)
+//
  func FreeDiskSpace(volume string) (free uint64, err error) {
         var fs syscall.Statfs_t
         err = syscall.Statfs(volume, &fs)
@@ -388,7 +547,6 @@ func FreeDiskSpace(volume string) (free uint64, err error) {
                 // space in terms of 1K blocks.
                 free = fs.Bavail * uint64(fs.Bsize) / 1024
         }
-
         return
  }
  
@@ -409,3 +567,12 @@ func ReadAtMost(r io.Reader, maxbytes int) ([]byte, error) {
         }
         return buf, err
  }
+
+// IsValidLocator
+//     Return true if the specified string is a valid Keep locator.
+//     When Keep is extended to support hash types other than MD5,
+//     this should be updated to cover those as well.
+//
+func IsValidLocator(loc string) (bool, error) {
+       return regexp.MatchString(`^[0-9a-f]{32}$`, loc)
+}
diff --git a/services/keep/keep_test.go b/services/keep/keep_test.go

index 348445e78d105a79a136ea010c6139aad9ae4beb..97fa1c78919e3070bf02690dbb876a741837dda7 100644 (file)
--- a/services/keep/keep_test.go
+++ b/services/keep/keep_test.go
@@ -6,20 +6,30 @@ import (
         "io/ioutil"
         "os"
         "path"
+       "regexp"
         "testing"
  )
  
  var TEST_BLOCK = []byte("The quick brown fox jumps over the lazy dog.")
  var TEST_HASH = "e4d909c290d0fb1ca068ffaddf22cbd0"
+
+var TEST_BLOCK_2 = []byte("Pack my box with five dozen liquor jugs.")
+var TEST_HASH_2 = "f15ac516f788aec4f30932ffb6395c39"
+
+var TEST_BLOCK_3 = []byte("Now is the time for all good men to come to the aid of their country.")
+var TEST_HASH_3 = "eed29bbffbc2dbe5e5ee0bb71888e61f"
+
+// BAD_BLOCK is used to test collisions and corruption.
+// It must not match any test hashes.
  var BAD_BLOCK = []byte("The magic words are squeamish ossifrage.")
  
  // TODO(twp): Tests still to be written
  //
-//   * PutBlockFull
+//   * TestPutBlockFull
  //       - test that PutBlock returns 503 Full if the filesystem is full.
  //         (must mock FreeDiskSpace or Statfs? use a tmpfs?)
  //
-//   * PutBlockWriteErr
+//   * TestPutBlockWriteErr
  //       - test the behavior when Write returns an error.
  //           - Possible solutions: use a small tmpfs and a high
  //             MIN_FREE_KILOBYTES to trick PutBlock into attempting
@@ -280,6 +290,68 @@ func TestFindKeepVolumesFail(t *testing.T) {
         }
  }
  
+// TestIndex
+//     Test an /index request.
+func TestIndex(t *testing.T) {
+       defer teardown()
+
+       // Set up Keep volumes and populate them.
+       // Include multiple blocks on different volumes, and
+       // some metadata files.
+       KeepVolumes = setup(t, 2)
+       store(t, KeepVolumes[0], TEST_HASH, TEST_BLOCK)
+       store(t, KeepVolumes[1], TEST_HASH_2, TEST_BLOCK_2)
+       store(t, KeepVolumes[0], TEST_HASH_3, TEST_BLOCK_3)
+       store(t, KeepVolumes[0], TEST_HASH+".meta", []byte("metadata"))
+       store(t, KeepVolumes[1], TEST_HASH_2+".meta", []byte("metadata"))
+
+       index := IndexLocators("")
+       expected := `^` + TEST_HASH + `\+\d+ \d+\n` +
+               TEST_HASH_3 + `\+\d+ \d+\n` +
+               TEST_HASH_2 + `\+\d+ \d+\n$`
+
+       match, err := regexp.MatchString(expected, index)
+       if err == nil {
+               if !match {
+                       t.Errorf("IndexLocators returned:\n-----\n%s-----\n", index)
+               }
+       } else {
+               t.Errorf("regexp.MatchString: %s", err)
+       }
+}
+
+// TestNodeStatus
+//     Test that GetNodeStatus returns valid info about available volumes.
+//
+//     TODO(twp): set up appropriate interfaces to permit more rigorous
+//     testing.
+//
+func TestNodeStatus(t *testing.T) {
+       defer teardown()
+
+       // Set up test Keep volumes.
+       KeepVolumes = setup(t, 2)
+
+       // Get node status and make a basic sanity check.
+       st := GetNodeStatus()
+       for i, vol := range KeepVolumes {
+               volinfo := st.Volumes[i]
+               mtp := volinfo.MountPoint
+               if mtp != vol {
+                       t.Errorf("GetNodeStatus mount_point %s != KeepVolume %s", mtp, vol)
+               }
+               if volinfo.DeviceNum == 0 {
+                       t.Errorf("uninitialized device_num in %v", volinfo)
+               }
+               if volinfo.BytesFree == 0 {
+                       t.Errorf("uninitialized bytes_free in %v", volinfo)
+               }
+               if volinfo.BytesUsed == 0 {
+                       t.Errorf("uninitialized bytes_used in %v", volinfo)
+               }
+       }
+}
+
  // ========================================
  // Helper functions for unit tests.
  // ========================================
author	Tom Clegg <tom@curoverse.com>
	Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)
committer	Tom Clegg <tom@curoverse.com>
	Thu, 17 Apr 2014 00:38:28 +0000 (20:38 -0400)
apps/workbench/app/assets/javascripts/selection.js		patch \| blob \| history
apps/workbench/app/controllers/collections_controller.rb		patch \| blob \| history
apps/workbench/app/helpers/application_helper.rb		patch \| blob \| history
apps/workbench/app/views/application/_selection_checkbox.html.erb		patch \| blob \| history
apps/workbench/app/views/collections/_index_tbody.html.erb		patch \| blob \| history
apps/workbench/app/views/collections/_show_files.html.erb		patch \| blob \| history
apps/workbench/app/views/layouts/application.html.erb		patch \| blob \| history
apps/workbench/test/integration/users_test.rb		patch \| blob \| history
doc/_config.yml		patch \| blob \| history
doc/api/methods/jobs.html.textile.liquid		patch \| blob \| history
doc/api/schema/Job.html.textile.liquid		patch \| blob \| history
doc/api/schema/PipelineTemplate.html.textile.liquid		patch \| blob \| history
doc/user/reference/job-and-pipeline-reference.html.textile.liquid	[deleted file]	patch \| blob \| history
doc/user/tutorials/tutorial-firstscript.html.textile.liquid		patch \| blob \| history
doc/user/tutorials/tutorial-new-pipeline.html.textile.liquid		patch \| blob \| history
sdk/cli/bin/crunch-job		patch \| blob \| history
services/api/app/controllers/arvados/v1/links_controller.rb		patch \| blob \| history
services/api/test/functional/arvados/v1/links_controller_test.rb		patch \| blob \| history
services/keep/keep.go		patch \| blob \| history
services/keep/keep_test.go		patch \| blob \| history