Merge branch 'master' into 2525-java-sdk

author radhika <radhika@curoverse.com>

Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)

committer radhika <radhika@curoverse.com>

Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)
author radhika <radhika@curoverse.com>
Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)
committer radhika <radhika@curoverse.com>
Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)
diff --git a/apps/workbench/test/test_helper.rb b/apps/workbench/test/test_helper.rb

index 797551e7fb5c0a3d49e3dba26bdf697d6ba8756a..05be43cb5fe9fe7daeb1a224238f5411bc0c8b40 100644 (file)
--- a/apps/workbench/test/test_helper.rb
+++ b/apps/workbench/test/test_helper.rb
@@ -74,16 +74,11 @@ class ActiveSupport::TestCase
  end
  
  class ApiServerBackedTestRunner < MiniTest::Unit
-  # Make a hash that unsets Bundle's environment variables.
-  # We'll use this environment when we launch Bundle commands in the API
-  # server.  Otherwise, those commands will try to use Workbench's gems, etc.
-  @@APIENV = Hash[ENV.map { |key, val|
-                    (key =~ /^BUNDLE_/) ? [key, nil] : nil
-                  }.compact]
-
    def _system(*cmd)
-    if not system(@@APIENV, *cmd)
-      raise RuntimeError, "#{cmd[0]} returned exit code #{$?.exitstatus}"
+    Bundler.with_clean_env do
+      if not system({'RAILS_ENV' => 'test'}, *cmd)
+        raise RuntimeError, "#{cmd[0]} returned exit code #{$?.exitstatus}"
+      end
      end
    end
  
@@ -95,15 +90,16 @@ class ApiServerBackedTestRunner < MiniTest::Unit
        _system('bundle', 'exec', 'rake', 'db:fixtures:load')
        _system('bundle', 'exec', 'rails', 'server', '-d')
        timeout = Time.now.tv_sec + 10
-      begin
+      good_pid = false
+      while (not good_pid) and (Time.now.tv_sec < timeout)
          sleep 0.2
          begin
            server_pid = IO.read(SERVER_PID_PATH).to_i
-          good_pid = (server_pid > 0) and (Process.kill(0, pid) rescue false)
+          good_pid = (server_pid > 0) and (Process.kill(0, server_pid) rescue false)
          rescue Errno::ENOENT
            good_pid = false
          end
-      end while (not good_pid) and (Time.now.tv_sec < timeout)
+      end
        if not good_pid
          raise RuntimeError, "could not find API server Rails pid"
        end
diff --git a/doc/_config.yml b/doc/_config.yml

index 9fc097b1b84aee197dd99b90386fe62b453fa0f6..72fcb7d1e61b7140055a29a0706de48d83037b15 100644 (file)
--- a/doc/_config.yml
+++ b/doc/_config.yml
@@ -114,10 +114,9 @@ navbar:
        - admin/cheat_sheet.html.textile.liquid
    installguide:
      - Install:
-      - install/index.html.md.liquid
+      - install/index.html.textile.liquid
        - install/install-sso.html.textile.liquid
        - install/install-api-server.html.textile.liquid
        - install/install-workbench-app.html.textile.liquid
-      - install/client.html.textile.liquid
        - install/create-standard-objects.html.textile.liquid
        - install/install-crunch-dispatch.html.textile.liquid
diff --git a/doc/api/methods.html.textile.liquid b/doc/api/methods.html.textile.liquid

index 57d058e157b7c113f529ce5398f380965b201842..da15df8b92662d86ee8b9c42de62f01e871095da 100644 (file)
--- a/doc/api/methods.html.textile.liquid
+++ b/doc/api/methods.html.textile.liquid
@@ -24,12 +24,39 @@ filters=[["owner_uuid","=","xyzzy-tpzed-a4lcehql0dv2u25"]]
  
  table(table table-bordered table-condensed).
  |*Parameter name*|*Value*|*Description*|
-|limit   |integer|Maximum number of resources to return|
-|offset  |integer|Skip the first 'offset' objects|
-|filters |array  |Conditions for selecting resources to return|
-|order   |array  |List of fields to use to determine sorting order for returned objects|
-|select  |array  |Specify which fields to return|
-|distinct|boolean|true: (default) do not return duplicate objects<br> false: permitted to return duplicates|
+|limit   |integer|Maximum number of resources to return.|
+|offset  |integer|Skip the first 'offset' resources that match the given filter conditions.|
+|filters |array  |Conditions for selecting resources to return (see below).|
+|order   |array  |Attributes to use as sort keys to determine the order resources are returned, each optionally followed by @asc@ or @desc@ to indicate ascending or descending order.
+Example: @["head_uuid asc","modified_at desc"]@
+Default: @["created_at desc"]@|
+|select  |array  |Set of attributes to include in the response.
+Example: @["head_uuid","tail_uuid"]@
+Default: all available attributes, minus "manifest_text" in the case of collections.|
+|distinct|boolean|@true@: (default) do not return duplicate objects
+@false@: permitted to return duplicates|
+
+h3. Filters
+
+The value of the @filters@ parameter is an array of conditions. The @list@ method returns only the resources that satisfy all of the given conditions. In other words, the conjunction @AND@ is implicit.
+
+Each condition is expressed as an array with three elements: @[attribute, operator, operand]@.
+
+table(table table-bordered table-condensed).
+|_. Index|_. Element|_. Type|_. Description|_. Examples|
+|0|attribute|string|Name of the attribute to compare|@script_version@, @head_uuid@|
+|1|operator|string|Comparison operator|@>@, @>=@, @like@, @not in@|
+|2|operand|string, array, or null|Value to compare with the resource attribute|@"d00220fb%"@, @"1234"@, @["foo","bar"]@, @nil@|
+
+The following operators are available.
+
+table(table table-bordered table-condensed).
+|_. Operator|_. Operand type|_. Example|
+|@<@, @<=@, @>=@, @>@, @like@|string|@["script_version","like","d00220fb%"]@|
+|@=@, @!=@|string or null|@["tail_uuid","=","xyzzy-j7d0g-fffffffffffffff"]@
+@["tail_uuid","!=",null]@|
+|@in@, @not in@|array of strings|@["script_version","in",["master","d00220fb38d4b85ca8fc28a8151702a2b9d1dec5"]]@|
+|@is_a@|string|@["head_uuid","is_a","arvados#pipelineInstance"]@|
  
  h2. Create
  
diff --git a/doc/api/methods/logs.html.textile.liquid b/doc/api/methods/logs.html.textile.liquid

index 6d91322f2d3b31c9fad2ae1b0dd9b3936363389c..c5895d78a211dab8170a4b21ffcf66b6c574e33f 100644 (file)
--- a/doc/api/methods/logs.html.textile.liquid
+++ b/doc/api/methods/logs.html.textile.liquid
@@ -15,7 +15,7 @@ Required arguments are displayed in %{background:#ccffcc}green%.
  
  h2. create
  
-Create a new Log.
+Create a new log entry.
  
  Arguments:
  
@@ -25,43 +25,43 @@ table(table table-bordered table-condensed).
  
  h2. delete
  
-Delete an existing Log.
+Delete an existing log entry. This method can only be used by privileged (system administrator) users.
  
  Arguments:
  
  table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
-{background:#ccffcc}.|uuid|string|The UUID of the Log in question.|path||
+{background:#ccffcc}.|uuid|string|The UUID of the log entry in question.|path||
  
  h2. get
  
-Gets a Log's metadata by UUID.
+Retrieve a log entry.
  
  Arguments:
  
  table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
-{background:#ccffcc}.|uuid|string|The UUID of the Log in question.|path||
+{background:#ccffcc}.|uuid|string|The UUID of the log entry in question.|path||
  
  h2. list
  
-List logs.
+List log entries.
  
  Arguments:
  
  table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
-|limit|integer (default 100)|Maximum number of logs to return.|query||
-|order|string|Order in which to return matching logs.|query||
-|filters|array|Conditions for filtering logs.|query||
+|limit|integer (default 100)|Maximum number of log entries to return.|query||
+|order|string|Order in which to return matching log entries.|query||
+|filters|array|Conditions for filtering log entries.|query||
  
  h2. update
  
-Update attributes of an existing Log.
+Update attributes of an existing log entry. This method can only be used by privileged (system administrator) users.
  
  Arguments:
  
  table(table table-bordered table-condensed).
  |_. Argument |_. Type |_. Description |_. Location |_. Example |
-{background:#ccffcc}.|uuid|string|The UUID of the Log in question.|path||
+{background:#ccffcc}.|uuid|string|The UUID of the log entry in question.|path||
  |log|object||query||
diff --git a/doc/api/schema/Job.html.textile.liquid b/doc/api/schema/Job.html.textile.liquid

index 82f9242db362e5a7408dc4c132370f7f04094b8b..097d0df898bea9d79acd666478d94e59c376de02 100644 (file)
--- a/doc/api/schema/Job.html.textile.liquid
+++ b/doc/api/schema/Job.html.textile.liquid
@@ -50,6 +50,7 @@ h3. Runtime constraints
  
  table(table table-bordered table-condensed).
  |_. Key|_. Type|_. Description|_. Implemented|
+|docker_image|string|The name of a Docker image that this Job needs to run.  If specified, Crunch will create a Docker container from this image, and run the Job's script inside that.  The Keep mount and work directories will be available as volumes inside this container.  You may specify the image in any format that Docker accepts, such as "arvados/jobs" or a hash identifier.  If you specify a name, Crunch will try to install the latest version using @docker.io pull@.|&#10003;|
  |min_nodes|integer||&#10003;|
  |max_nodes|integer|||
  |max_tasks_per_node|integer|Maximum simultaneous tasks on a single node|&#10003;|
diff --git a/doc/install/index.html.md.liquid b/doc/install/index.html.md.liquid

deleted file mode 100644 (file)

index aaac7a7..0000000
--- a/doc/install/index.html.md.liquid
+++ /dev/null
@@ -1,18 +0,0 @@
----
-layout: default
-navsection: installguide
-title: Overview
-...
-
-{% include 'alert_stub' %}
-
-# Installation Overview
-
-1. Set up a cluster, or use Amazon
-1. Create and mount Keep volumes
-1. [Install the Single Sign On (SSO) server](install-sso.html)
-1. [Install the Arvados REST API server](install-api-server.html)
-1. [Install the Arvados workbench application](install-workbench-app.html)
-1. [Install the Crunch dispatcher](install-crunch-dispatch.html)
-1. [Create standard objects](create-standard-objects.html)
-1. [Install client libraries](client.html)
diff --git a/doc/install/index.html.textile.liquid b/doc/install/index.html.textile.liquid

new file mode 100644 (file)

index 0000000..5bf35f3
--- /dev/null
+++ b/doc/install/index.html.textile.liquid
@@ -0,0 +1,18 @@
+---
+layout: default
+navsection: installguide
+title: Overview
+...
+
+{% include 'alert_stub' %}
+
+h2. Installation Overview
+
+# Set up a cluster, or use Amazon
+# Create and mount Keep volumes
+# "Install the Single Sign On (SSO) server":install-sso.html
+# "Install the Arvados REST API server":install-api-server.html
+# "Install the Arvados workbench application":install-workbench-app.html
+# "Install the Crunch dispatcher":install-crunch-dispatch.html
+# "Create standard objects":create-standard-objects.html
+# Install client libraries (see "SDK Reference":{{site.baseurl}}/sdk/index.html).
diff --git a/doc/install/install-api-server.html.textile.liquid b/doc/install/install-api-server.html.textile.liquid

index a1cca3d8b4466b48785dd69abf94f2f798fdf878..0d721fcb0930a6f606e73af57c7fed868a19ddd8 100644 (file)
--- a/doc/install/install-api-server.html.textile.liquid
+++ b/doc/install/install-api-server.html.textile.liquid
@@ -37,7 +37,8 @@ sudo gem install bundler</span>
  h2. Download the source tree
  
  <notextile>
-<pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
+<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
+~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
  </code></pre></notextile>
  
  See also: "Downloading the source code":https://arvados.org/projects/arvados/wiki/Download on the Arvados wiki.
@@ -140,7 +141,7 @@ To enable streaming so users can monitor crunch jobs in real time, add to your P
  </code></pre>
  </notextile>
  
-h2. Add an admin user
+h2(#admin-user). Add an admin user
  
  Point your browser to the API server's login endpoint:
  
diff --git a/doc/install/install-crunch-dispatch.html.textile.liquid b/doc/install/install-crunch-dispatch.html.textile.liquid

index 9cdebbc082f4e3cc7c613bcd7828669cd1842b23..d0f4414b6e66c2dabcd8cfb12498033b7254d1ec 100644 (file)
--- a/doc/install/install-crunch-dispatch.html.textile.liquid
+++ b/doc/install/install-crunch-dispatch.html.textile.liquid
@@ -11,22 +11,15 @@ The dispatcher normally runs on the same host/VM as the API server.
  
  h4. Perl SDK dependencies
  
-* @apt-get install libjson-perl libwww-perl libio-socket-ssl-perl libipc-system-simple-perl@
+Install the Perl SDK on the controller.
  
-Add this to @/etc/apt/sources.list@
-
-@deb http://git.oxf.freelogy.org/apt wheezy main contrib@
-
-Then
-
-@apt-get install libwarehouse-perl@
+* See "Perl SDK":{{site.baseurl}}/sdk/perl/index.html page for details.
  
  h4. Python SDK dependencies
  
-On controller and all compute nodes:
+Install the Python SDK and CLI tools on controller and all compute nodes.
  
-* @apt-get install python-pip@
-* @pip install --upgrade virtualenv arvados-python-client@
+* See "Python SDK":{{site.baseurl}}/sdk/python/sdk-python.html page for details.
  
  h4. Likely crunch job dependencies
  
@@ -50,29 +43,16 @@ The crunch user should have the same UID, GID, and home directory on all compute
  
  h4. Repositories
  
-Crunch scripts must be in Git repositories in @/var/cache/git/*/.git@ (or whatever is configured in @services/api/config/environments/production.rb@).
-
-h4. Importing commits
-
-@services/api/script/import_commits.rb production@ must run periodically. Example @/var/service/arvados_import_commits/run@ script for daemontools or runit:
-
-<pre>
-#!/bin/sh
-set -e
-while sleep 60
-do
-  cd /path/to/arvados/services/api
-  setuidgid www-data env RAILS_ENV=production /usr/local/rvm/bin/rvm-exec 2.0.0 bundle exec ./script/import_commits.rb 2>&1
-done
-</pre>
+Crunch scripts must be in Git repositories in @/var/lib/arvados/git/*.git@ (or whatever is configured in @services/api/config/environments/production.rb@).
  
-Once you have imported some commits, you should be able to create a new job:
+Once you have a repository with commits -- and you have read access to the repository -- you should be able to create a new job:
  
  <pre>
  read -rd $'\000' newjob <<EOF; arv job create --job "$newjob"
  {"script_parameters":{"input":"f815ec01d5d2f11cb12874ab2ed50daa"},
   "script_version":"master",
- "script":"hash"}
+ "script":"hash",
+ "repository":"arvados"}
  EOF
  </pre>
  
@@ -94,8 +74,12 @@ Example @/var/service/arvados_crunch_dispatch/run@ script:
  <pre>
  #!/bin/sh
  set -e
+
+rvmexec=""
+## uncomment this line if you use rvm:
+#rvmexec="/usr/local/rvm/bin/rvm-exec 2.1.1"
+
  export PATH="$PATH":/path/to/arvados/services/crunch
-export PERLLIB=/path/to/arvados/sdk/perl/lib:/path/to/warehouse-apps/libwarehouse-perl/lib
  export ARVADOS_API_HOST={{ site.arvados_api_host }}
  export CRUNCH_DISPATCH_LOCKFILE=/var/lock/crunch-dispatch
  
@@ -106,5 +90,5 @@ fuser -TERM -k $CRUNCH_DISPATCH_LOCKFILE || true
  
  cd /path/to/arvados/services/api
  export RAILS_ENV=production
-exec /usr/local/rvm/bin/rvm-exec 2.0.0 bundle exec ./script/crunch-dispatch.rb 2>&1
+exec $rvmexec bundle exec ./script/crunch-dispatch.rb 2>&1
  </pre>
diff --git a/doc/install/install-sso.html.textile.liquid b/doc/install/install-sso.html.textile.liquid

index f220ef6d369c33ccaae266bfb416e44a550a9f9f..2f2ba5151b33a1c4103833c8d02187121917e60d 100644 (file)
--- a/doc/install/install-sso.html.textile.liquid
+++ b/doc/install/install-sso.html.textile.liquid
@@ -5,7 +5,8 @@ title: Install Single Sign On (SSO) server
  ...
  
  <notextile>
-<pre><code>~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
+<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
+~$ <span class="userinput">git clone https://github.com/curoverse/sso-devise-omniauth-provider.git</span>
  ~$ <span class="userinput">cd sso-devise-omniauth-provider</span>
  ~/sso-devise-omniauth-provider$ <span class="userinput">bundle install</span>
  ~/sso-devise-omniauth-provider$ <span class="userinput">rake db:create</span>
diff --git a/doc/install/install-workbench-app.html.textile.liquid b/doc/install/install-workbench-app.html.textile.liquid

index c9395c6ce736f1c51e9d84c47adee56766e0c28c..eaf4edecdd8b2fe8c4f1fe28cffa5a205d9e7daa 100644 (file)
--- a/doc/install/install-workbench-app.html.textile.liquid
+++ b/doc/install/install-workbench-app.html.textile.liquid
@@ -25,7 +25,12 @@ Install graphviz.
  
  h2. Download the source tree
  
-Please follow the instructions on the "Download page":https://arvados.org/projects/arvados/wiki/Download in the wiki.
+<notextile>
+<pre><code>~$ <span class="userinput">cd $HOME</span> # (or wherever you want to install)
+~$ <span class="userinput">git clone https://github.com/curoverse/arvados.git</span>
+</code></pre></notextile>
+
+See also: "Downloading the source code":https://arvados.org/projects/arvados/wiki/Download on the Arvados wiki.
  
  The Workbench application is in @apps/workbench@ in the source tree.
  
@@ -59,21 +64,28 @@ Copy @config/application.yml.example@ to @config/application.yml@ and edit it ap
  * Set @secret_token@ to the string you generated with @rake secret@.
  * Point @arvados_login_base@ and @arvados_v1_base@ at your "API server":install-api-server.html
  * @site_name@ can be any string to identify this Workbench.
-* Assuming that the SSL certificate you use for development isn't signed by a CA, make sure @arvados_insecure_https@ is @true@.
+* If the SSL certificate you use for development isn't signed by a CA, make sure @arvados_insecure_https@ is @true@.
  
  Copy @config/piwik.yml.example@ to @config/piwik.yml@ and edit to suit.
  
-h3. Apache/Passenger (optional)
+h2. Start a standalone server
  
-Set up Apache and Passenger. Point them to the apps/workbench directory in the source tree.
+For testing and development, the easiest way to get started is to run the web server that comes with Rails.
+
+<notextile>
+<pre><code>~/arvados/apps/workbench$ <span class="userinput">bundle exec rails server --port=3031</span>
+</code></pre>
+</notextile>
+
+Point your browser to <notextile><code>http://<b>your.host</b>:3031/</code></notextile>.
  
  h2. Trusted client setting
  
-Log in to Workbench once (this ensures that the Arvados API server has a record of the Workbench client).
+Log in to Workbench once to ensure that the Arvados API server has a record of the Workbench client. (It's OK if Workbench says your account hasn't been activated yet. We'll deal with that next.)
  
  In the API server project root, start the rails console.  Locate the ApiClient record for your Workbench installation (typically, while you're setting this up, the @last@ one in the database is the one you want), then set the @is_trusted@ flag for the appropriate client record:
  
-<notextile><pre><code>~/arvados/services/api$ <span class="userinput">RAILS_ENV=development bundle exec rails console</span>
+<notextile><pre><code>~/arvados/services/api$ <span class="userinput">bundle exec rails console</span>
  irb(main):001:0&gt; <span class="userinput">wb = ApiClient.all.last; [wb.url_prefix, wb.created_at]</span>
  =&gt; ["https://workbench.example.com/", Sat, 19 Apr 2014 03:35:12 UTC +00:00]
  irb(main):002:0&gt; <span class="userinput">include CurrentApiClient</span>
@@ -82,3 +94,9 @@ irb(main):003:0&gt; <span class="userinput">act_as_system_user do wb.update_attr
  =&gt; true
  </code></pre>
  </notextile>
+
+h2. Activate your own account
+
+Unless you already activated your account when installing the API server, the first time you log in to Workbench you will see a message that your account is awaiting activation.
+
+Activate your own account and give yourself administrator privileges by following the instructions in the "'Add an admin user' section of the API server install page":install-api-server.html#admin-user.
diff --git a/doc/sdk/perl/index.html.textile.liquid b/doc/sdk/perl/index.html.textile.liquid

index 288bc31e05fd0952341156d213dba2c7e22d3a7d..448cbb1ede54814a6db5285a9ffc66b92e4e2cb8 100644 (file)
--- a/doc/sdk/perl/index.html.textile.liquid
+++ b/doc/sdk/perl/index.html.textile.liquid
@@ -17,7 +17,7 @@ h3. Installation
  
  <notextile>
  <pre>
-$ <code class="userinput">sudo apt-get install libjson-perl libio-socket-ssl-perl libwww-perl</code>
+$ <code class="userinput">sudo apt-get install libjson-perl libio-socket-ssl-perl libwww-perl libipc-system-simple-perl</code>
  $ <code class="userinput">git clone https://github.com/curoverse/arvados.git</code>
  $ <code class="userinput">cd arvados/sdk/perl</code>
  $ <code class="userinput">perl Makefile.PL</code>
diff --git a/doc/sdk/python/sdk-python.html.textile.liquid b/doc/sdk/python/sdk-python.html.textile.liquid

index d563d6e6ae13f31ccdf9cf2b5279b45e05aa9056..eef9a246c1b93af0d3914ba0aff00908bc34bb22 100644 (file)
--- a/doc/sdk/python/sdk-python.html.textile.liquid
+++ b/doc/sdk/python/sdk-python.html.textile.liquid
@@ -41,11 +41,11 @@ h4. Option 2: build and install from source
  
  <notextile>
  <pre>
-$ <code class="userinput">sudo apt-get install python-dev libattr1-dev libfuse-dev pkg-config</code>
-$ <code class="userinput">git clone https://github.com/curoverse/arvados.git</code>
-$ <code class="userinput">cd arvados/sdk/python</code>
-$ <code class="userinput">./build.sh</code>
-$ <code class="userinput">sudo python setup.py install</code>
+~$ <code class="userinput">sudo apt-get install python-dev libattr1-dev libfuse-dev pkg-config</code>
+~$ <code class="userinput">git clone https://github.com/curoverse/arvados.git</code>
+~$ <code class="userinput">cd arvados/sdk/python</code>
+~/arvados/sdk/python$ <code class="userinput">./build.sh</code>
+~/arvados/sdk/python$ <code class="userinput">sudo python setup.py install</code>
  </pre>
  </notextile>
  
diff --git a/docker/build_tools/Makefile b/docker/build_tools/Makefile

index 9eac2ec61cbe4b6462f98f2ecf5589de17d4f2cf..69db746326767729c5eacaa728db905400a7929a 100644 (file)
--- a/docker/build_tools/Makefile
+++ b/docker/build_tools/Makefile
@@ -24,6 +24,8 @@ BUILD = build/.buildstamp
  
  BASE_DEPS = base/Dockerfile $(BASE_GENERATED)
  
+JOBS_DEPS = jobs/Dockerfile
+
  API_DEPS = api/Dockerfile $(API_GENERATED)
  
  DOC_DEPS = doc/Dockerfile doc/apache2_vhost
@@ -86,6 +88,10 @@ $(BUILD):
         mkdir -p build
         rsync -rlp --exclude=docker/ --exclude='**/log/*' --exclude='**/tmp/*' \
                 --chmod=Da+rx,Fa+rX ../ build/
+       find build/ -name \*.gem -delete
+       cd build/sdk/python/ && ./build.sh
+       cd build/sdk/cli && gem build arvados-cli.gemspec
+       cd build/sdk/ruby && gem build arvados.gemspec
         touch build/.buildstamp
  
  $(BASE_GENERATED): config.yml $(BUILD)
@@ -125,6 +131,10 @@ doc-image: base-image $(BUILD) $(DOC_DEPS)
         $(DOCKER_BUILD) -t arvados/doc doc
         date >doc-image
  
+jobs-image: base-image $(BUILD) $(JOBS_DEPS)
+       $(DOCKER_BUILD) -t arvados/jobs jobs
+       date >jobs-image
+
  workbench-image: passenger-image $(BUILD) $(WORKBENCH_DEPS)
         mkdir -p workbench/generated
         tar -czf workbench/generated/workbench.tar.gz -C build/apps workbench
diff --git a/docker/jobs/Dockerfile b/docker/jobs/Dockerfile

new file mode 100644 (file)

index 0000000..28ef858
--- /dev/null
+++ b/docker/jobs/Dockerfile
@@ -0,0 +1,20 @@
+FROM arvados/base
+MAINTAINER Brett Smith <brett@curoverse.com>
+
+# Install dependencies and set up system.
+# The FUSE packages help ensure that we can install the Python SDK (arv-mount).
+RUN /usr/bin/apt-get install -q -y python-dev python-llfuse python-pip \
+      libio-socket-ssl-perl libjson-perl liburi-perl libwww-perl \
+      fuse libattr1-dev libfuse-dev && \
+    /usr/sbin/adduser --disabled-password \
+      --gecos 'Crunch execution user' crunch && \
+    /usr/bin/install -d -o crunch -g crunch -m 0700 /tmp/crunch-job && \
+    /bin/ln -s /usr/src/arvados /usr/local/src/arvados
+
+# Install Arvados packages.
+RUN find /usr/src/arvados/sdk -name '*.gem' -print0 | \
+      xargs -0rn 1 gem install && \
+    cd /usr/src/arvados/sdk/python && \
+    python setup.py install
+
+USER crunch
diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job

index 48a6c9dea7f7f5be8fa20367e46e29de969f5b62..14b1be9a99dabc11e0c82bfc1be3755885dddddc 100755 (executable)
--- a/sdk/cli/bin/crunch-job
+++ b/sdk/cli/bin/crunch-job
@@ -498,7 +498,30 @@ if (!$have_slurm)
    must_lock_now("$ENV{CRUNCH_TMP}/.lock", "a job is already running here.");
  }
  
-
+# If this job requires a Docker image, install that.
+my $docker_bin = "/usr/bin/docker.io";
+my $docker_image = $Job->{runtime_constraints}->{docker_image} || "";
+if ($docker_image) {
+  my $docker_pid = fork();
+  if ($docker_pid == 0)
+  {
+    srun (["srun", "--nodelist=" . join(' ', @node)],
+          [$docker_bin, 'pull', $docker_image]);
+    exit ($?);
+  }
+  while (1)
+  {
+    last if $docker_pid == waitpid (-1, WNOHANG);
+    freeze_if_want_freeze ($docker_pid);
+    select (undef, undef, undef, 0.1);
+  }
+  # If the Docker image was specified as a hash, pull will fail.
+  # Ignore that error.  We'll see what happens when we try to run later.
+  if (($? != 0) && ($docker_image !~ /^[0-9a-fA-F]{5,}$/))
+  {
+    croak("Installing Docker image $docker_image returned exit code $?");
+  }
+}
  
  foreach (qw (script script_version script_parameters runtime_constraints))
  {
@@ -603,7 +626,6 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
        qw(-n1 -c1 -N1 -D), $ENV{'TMPDIR'},
        "--job-name=$job_id.$id.$$",
         );
-    my @execargs = qw(sh);
      my $build_script_to_send = "";
      my $command =
         "if [ -e $ENV{TASK_WORK} ]; then rm -rf $ENV{TASK_WORK}; fi; "
@@ -615,8 +637,27 @@ for (my $todo_ptr = 0; $todo_ptr <= $#jobstep_todo; $todo_ptr ++)
        $command .=
           "&& perl -";
      }
-    $command .=
-        "&& exec arv-mount $ENV{TASK_KEEPMOUNT} --exec $ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
+    $command .= "&& exec arv-mount --allow-other $ENV{TASK_KEEPMOUNT} --exec ";
+    if ($docker_image)
+    {
+      $command .= "$docker_bin run -i -a stdin -a stdout -a stderr ";
+      # Dynamically configure the container to use the host system as its
+      # DNS server.  Get the host's global addresses from the ip command,
+      # and turn them into docker --dns options using gawk.
+      $command .=
+          q{$(ip -o address show scope global |
+              gawk 'match($4, /^([0-9\.:]+)\//, x){print "--dns", x[1]}') };
+      foreach my $env_key (qw(CRUNCH_SRC CRUNCH_TMP TASK_KEEPMOUNT))
+      {
+        $command .= "-v \Q$ENV{$env_key}:$ENV{$env_key}:rw\E ";
+      }
+      while (my ($env_key, $env_val) = each %ENV)
+      {
+        $command .= "-e \Q$env_key=$env_val\E ";
+      }
+      $command .= "$docker_image ";
+    }
+    $command .= "$ENV{CRUNCH_SRC}/crunch_scripts/" . $Job->{"script"};
      my @execargs = ('bash', '-c', $command);
      srun (\@srunargs, \@execargs, undef, $build_script_to_send);
      exit (111);
diff --git a/sdk/python/.gitignore b/sdk/python/.gitignore

index 6d57899d2e27f0ce355062466800ad11f6697b35..7f9c17b7433633f9447d0b3cc575fbe5c7182bca 100644 (file)
--- a/sdk/python/.gitignore
+++ b/sdk/python/.gitignore
@@ -2,4 +2,3 @@
  /dist/
  /*.egg-info
  /tmp
-setup.py
diff --git a/sdk/python/arvados/events.py b/sdk/python/arvados/events.py

new file mode 100644 (file)

index 0000000..e61b20c
--- /dev/null
+++ b/sdk/python/arvados/events.py
@@ -0,0 +1,33 @@
+from ws4py.client.threadedclient import WebSocketClient
+import thread
+import json
+import os
+import time
+import ssl
+import re
+import config
+
+class EventClient(WebSocketClient):
+    def __init__(self, url, filters, on_event):
+        ssl_options = None
+        if re.match(r'(?i)^(true|1|yes)$',
+                    config.get('ARVADOS_API_HOST_INSECURE', 'no')):
+            ssl_options={'cert_reqs': ssl.CERT_NONE}
+        else:
+            ssl_options={'cert_reqs': ssl.CERT_REQUIRED}
+
+        super(EventClient, self).__init__(url, ssl_options)
+        self.filters = filters
+        self.on_event = on_event
+
+    def opened(self):
+        self.send(json.dumps({"method": "subscribe", "filters": self.filters}))
+
+    def received_message(self, m):
+        self.on_event(json.loads(str(m)))
+
+def subscribe(api, filters, on_event):
+    url = "{}?api_token={}".format(api._rootDesc['websocketUrl'], config.get('ARVADOS_API_TOKEN'))
+    ws = EventClient(url, filters, on_event)
+    ws.connect()
+    return ws
diff --git a/sdk/python/arvados/fuse.py b/sdk/python/arvados/fuse.py

deleted file mode 100644 (file)

index 983dc2e..0000000
--- a/sdk/python/arvados/fuse.py
+++ /dev/null
@@ -1,317 +0,0 @@
-#
-# FUSE driver for Arvados Keep
-#
-
-import os
-import sys
-
-import llfuse
-import errno
-import stat
-import threading
-import arvados
-import pprint
-
-from time import time
-from llfuse import FUSEError
-
-class Directory(object):
-    '''Generic directory object, backed by a dict.
-    Consists of a set of entries with the key representing the filename
-    and the value referencing a File or Directory object.
-    '''
-
-    def __init__(self, parent_inode):
-        self.inode = None
-        self.parent_inode = parent_inode
-        self._entries = {}
-
-    def __getitem__(self, item):
-        return self._entries[item]
-
-    def __setitem__(self, key, item):
-        self._entries[key] = item
-
-    def __iter__(self):
-        return self._entries.iterkeys()
-
-    def items(self):
-        return self._entries.items()
-
-    def __contains__(self, k):
-        return k in self._entries
-
-    def size(self):
-        return 0
-
-class MagicDirectory(Directory):
-    '''A special directory that logically contains the set of all extant
-    keep locators.  When a file is referenced by lookup(), it is tested
-    to see if it is a valid keep locator to a manifest, and if so, loads the manifest
-    contents as a subdirectory of this directory with the locator as the directory name.
-    Since querying a list of all extant keep locators is impractical, only loaded collections 
-    are visible to readdir().'''
-
-    def __init__(self, parent_inode, inodes):
-        super(MagicDirectory, self).__init__(parent_inode)
-        self.inodes = inodes
-
-    def __contains__(self, k):
-        if k in self._entries:
-            return True
-        try:
-            if arvados.Keep.get(k):
-                return True
-            else:
-                return False
-        except Exception as e:
-            #print 'exception keep', e
-            return False
-
-    def __getitem__(self, item):
-        if item not in self._entries:
-            collection = arvados.CollectionReader(arvados.Keep.get(item))
-            self._entries[item] = self.inodes.add_entry(Directory(self.inode))
-            self.inodes.load_collection(self._entries[item], collection)
-        return self._entries[item]
-
-class File(object):
-    '''Wraps a StreamFileReader for use by Directory.'''
-
-    def __init__(self, parent_inode, reader):
-        self.inode = None
-        self.parent_inode = parent_inode
-        self.reader = reader
-
-    def size(self):
-        return self.reader.size()
-
-class FileHandle(object):
-    '''Connects a numeric file handle to a File or Directory object that has 
-    been opened by the client.'''
-
-    def __init__(self, fh, entry):
-        self.fh = fh
-        self.entry = entry
-
-class Inodes(object):
-    '''Manage the set of inodes.  This is the mapping from a numeric id 
-    to a concrete File or Directory object'''
-
-    def __init__(self):
-        self._entries = {}
-        self._counter = llfuse.ROOT_INODE
-
-    def __getitem__(self, item):
-        return self._entries[item]
-
-    def __setitem__(self, key, item):
-        self._entries[key] = item
-
-    def __iter__(self):
-        return self._entries.iterkeys()
-
-    def items(self):
-        return self._entries.items()
-
-    def __contains__(self, k):
-        return k in self._entries
-
-    def load_collection(self, parent_dir, collection):
-        '''parent_dir is the Directory object that will be populated by the collection.
-        collection is the arvados.CollectionReader to use as the source'''
-        for s in collection.all_streams():
-            cwd = parent_dir
-            for part in s.name().split('/'):
-                if part != '' and part != '.':
-                    if part not in cwd:
-                        cwd[part] = self.add_entry(Directory(cwd.inode))
-                    cwd = cwd[part]
-            for k, v in s.files().items():
-                cwd[k] = self.add_entry(File(cwd.inode, v))
-
-    def add_entry(self, entry):
-        entry.inode = self._counter
-        self._entries[entry.inode] = entry
-        self._counter += 1
-        return entry    
-
-class Operations(llfuse.Operations):
-    '''This is the main interface with llfuse.  The methods on this object are
-    called by llfuse threads to service FUSE events to query and read from 
-    the file system.
-
-    llfuse has its own global lock which is acquired before calling a request handler,
-    so request handlers do not run concurrently unless the lock is explicitly released 
-    with llfuse.lock_released.'''
-
-    def __init__(self, uid, gid):
-        super(Operations, self).__init__()
-
-        self.inodes = Inodes()
-        self.uid = uid
-        self.gid = gid
-        
-        # dict of inode to filehandle
-        self._filehandles = {}
-        self._filehandles_counter = 1
-
-        # Other threads that need to wait until the fuse driver
-        # is fully initialized should wait() on this event object.
-        self.initlock = threading.Event()
-
-    def init(self):
-        # Allow threads that are waiting for the driver to be finished
-        # initializing to continue
-        self.initlock.set()
-
-    def access(self, inode, mode, ctx):
-        return True
-   
-    def getattr(self, inode):
-        e = self.inodes[inode]
-
-        entry = llfuse.EntryAttributes()
-        entry.st_ino = inode
-        entry.generation = 0
-        entry.entry_timeout = 300
-        entry.attr_timeout = 300
-
-        entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
-        if isinstance(e, Directory):
-            entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFDIR
-        else:
-            entry.st_mode |= stat.S_IFREG
-
-        entry.st_nlink = 1
-        entry.st_uid = self.uid
-        entry.st_gid = self.gid
-        entry.st_rdev = 0
-
-        entry.st_size = e.size()
-
-        entry.st_blksize = 1024
-        entry.st_blocks = e.size()/1024
-        if e.size()/1024 != 0:
-            entry.st_blocks += 1
-        entry.st_atime = 0
-        entry.st_mtime = 0
-        entry.st_ctime = 0
-
-        return entry
-
-    def lookup(self, parent_inode, name):
-        #print "lookup: parent_inode", parent_inode, "name", name
-        inode = None
-
-        if name == '.':
-            inode = parent_inode
-        else:
-            if parent_inode in self.inodes:
-                p = self.inodes[parent_inode]
-                if name == '..':
-                    inode = p.parent_inode
-                elif name in p:
-                    inode = p[name].inode
-
-        if inode != None:
-            return self.getattr(inode)
-        else:
-            raise llfuse.FUSEError(errno.ENOENT)
-   
-    def open(self, inode, flags):
-        if inode in self.inodes:
-            p = self.inodes[inode]
-        else:
-            raise llfuse.FUSEError(errno.ENOENT)
-
-        if (flags & os.O_WRONLY) or (flags & os.O_RDWR):
-            raise llfuse.FUSEError(errno.EROFS)
-
-        if isinstance(p, Directory):
-            raise llfuse.FUSEError(errno.EISDIR)
-
-        fh = self._filehandles_counter
-        self._filehandles_counter += 1
-        self._filehandles[fh] = FileHandle(fh, p)
-        return fh
-
-    def read(self, fh, off, size):
-        #print "read", fh, off, size
-        if fh in self._filehandles:
-            handle = self._filehandles[fh]
-        else:
-            raise llfuse.FUSEError(errno.EBADF)
-
-        try:
-            with llfuse.lock_released:
-                return handle.entry.reader.readfrom(off, size)
-        except:
-            raise llfuse.FUSEError(errno.EIO)
-
-    def release(self, fh):
-        if fh in self._filehandles:
-            del self._filehandles[fh]
-
-    def opendir(self, inode):
-        #print "opendir: inode", inode
-
-        if inode in self.inodes:
-            p = self.inodes[inode]
-        else:
-            raise llfuse.FUSEError(errno.ENOENT)
-
-        if not isinstance(p, Directory):
-            raise llfuse.FUSEError(errno.ENOTDIR)
-
-        fh = self._filehandles_counter
-        self._filehandles_counter += 1
-        if p.parent_inode in self.inodes:
-            parent = self.inodes[p.parent_inode]
-        else:
-            parent = None
-        self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items()))
-        return fh
-
-    def readdir(self, fh, off):
-        #print "readdir: fh", fh, "off", off
-
-        if fh in self._filehandles:
-            handle = self._filehandles[fh]
-        else:
-            raise llfuse.FUSEError(errno.EBADF)
-
-        #print "handle.entry", handle.entry
-
-        e = off
-        while e < len(handle.entry):
-            yield (handle.entry[e][0], self.getattr(handle.entry[e][1].inode), e+1)
-            e += 1
-
-    def releasedir(self, fh):
-        del self._filehandles[fh]
-
-    def statfs(self):
-        st = llfuse.StatvfsData()
-        st.f_bsize = 1024 * 1024
-        st.f_blocks = 0
-        st.f_files = 0
-
-        st.f_bfree = 0
-        st.f_bavail = 0
-
-        st.f_ffree = 0
-        st.f_favail = 0
-
-        st.f_frsize = 0
-        return st
-
-    # The llfuse documentation recommends only overloading functions that
-    # are actually implemented, as the default implementation will raise ENOSYS.
-    # However, there is a bug in the llfuse default implementation of create()
-    # "create() takes exactly 5 positional arguments (6 given)" which will crash
-    # arv-mount.
-    # The workaround is to implement it with the proper number of parameters,
-    # and then everything works out.
-    def create(self, p1, p2, p3, p4, p5):
-        raise llfuse.FUSEError(errno.EROFS)
diff --git a/sdk/python/arvados/fuse/__init__.py b/sdk/python/arvados/fuse/__init__.py

new file mode 100644 (file)

index 0000000..8b734f2
--- /dev/null
+++ b/sdk/python/arvados/fuse/__init__.py
@@ -0,0 +1,585 @@
+#
+# FUSE driver for Arvados Keep
+#
+
+import os
+import sys
+
+import llfuse
+import errno
+import stat
+import threading
+import arvados
+import pprint
+import arvados.events
+import re
+import apiclient
+import json
+
+from time import time
+from llfuse import FUSEError
+
+class FreshBase(object):
+    '''Base class for maintaining fresh/stale state to determine when to update.'''
+    def __init__(self):
+        self._stale = True
+        self._poll = False
+        self._last_update = time()
+        self._poll_time = 60
+
+    # Mark the value as stale
+    def invalidate(self):
+        self._stale = True
+
+    # Test if the entries dict is stale
+    def stale(self):
+        if self._stale:
+            return True
+        if self._poll:
+            return (self._last_update + self._poll_time) < time()
+        return False
+
+    def fresh(self):
+        self._stale = False
+        self._last_update = time()
+
+
+class File(FreshBase):
+    '''Base for file objects.'''
+
+    def __init__(self, parent_inode):
+        super(File, self).__init__()
+        self.inode = None
+        self.parent_inode = parent_inode
+
+    def size(self):
+        return 0
+
+    def readfrom(self, off, size):
+        return ''
+
+
+class StreamReaderFile(File):
+    '''Wraps a StreamFileReader as a file.'''
+
+    def __init__(self, parent_inode, reader):
+        super(StreamReaderFile, self).__init__(parent_inode)
+        self.reader = reader
+
+    def size(self):
+        return self.reader.size()
+
+    def readfrom(self, off, size):
+        return self.reader.readfrom(off, size)
+
+    def stale(self):
+        return False
+
+
+class ObjectFile(File):
+    '''Wraps a dict as a serialized json object.'''
+
+    def __init__(self, parent_inode, contents):
+        super(ObjectFile, self).__init__(parent_inode)
+        self.contentsdict = contents
+        self.uuid = self.contentsdict['uuid']
+        self.contents = json.dumps(self.contentsdict, indent=4, sort_keys=True)
+
+    def size(self):
+        return len(self.contents)
+
+    def readfrom(self, off, size):
+        return self.contents[off:(off+size)]
+
+
+class Directory(FreshBase):
+    '''Generic directory object, backed by a dict.
+    Consists of a set of entries with the key representing the filename
+    and the value referencing a File or Directory object.
+    '''
+
+    def __init__(self, parent_inode):
+        super(Directory, self).__init__()
+
+        '''parent_inode is the integer inode number'''
+        self.inode = None
+        if not isinstance(parent_inode, int):
+            raise Exception("parent_inode should be an int")
+        self.parent_inode = parent_inode
+        self._entries = {}
+
+    #  Overriden by subclasses to implement logic to update the entries dict
+    #  when the directory is stale
+    def update(self):
+        pass
+
+    # Only used when computing the size of the disk footprint of the directory
+    # (stub)
+    def size(self):
+        return 0
+
+    def checkupdate(self):
+        if self.stale():
+            try:
+                self.update()
+            except apiclient.errors.HttpError as e:
+                print e
+
+    def __getitem__(self, item):
+        self.checkupdate()
+        return self._entries[item]
+
+    def items(self):
+        self.checkupdate()
+        return self._entries.items()
+
+    def __iter__(self):
+        self.checkupdate()
+        return self._entries.iterkeys()
+
+    def __contains__(self, k):
+        self.checkupdate()
+        return k in self._entries
+
+    def merge(self, items, fn, same, new_entry):
+        '''Helper method for updating the contents of the directory.
+
+        items: array with new directory contents
+
+        fn: function to take an entry in 'items' and return the desired file or
+        directory name
+
+        same: function to compare an existing entry with an entry in the items
+        list to determine whether to keep the existing entry.
+
+        new_entry: function to create a new directory entry from array entry.
+        '''
+
+        oldentries = self._entries
+        self._entries = {}
+        for i in items:
+            n = fn(i)
+            if n in oldentries and same(oldentries[n], i):
+                self._entries[n] = oldentries[n]
+                del oldentries[n]
+            else:
+                self._entries[n] = self.inodes.add_entry(new_entry(i))
+        for n in oldentries:
+            llfuse.invalidate_entry(self.inode, str(n))
+            self.inodes.del_entry(oldentries[n])
+        self.fresh()
+
+
+class CollectionDirectory(Directory):
+    '''Represents the root of a directory tree holding a collection.'''
+
+    def __init__(self, parent_inode, inodes, collection_locator):
+        super(CollectionDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+        self.collection_locator = collection_locator
+
+    def same(self, i):
+        return i['uuid'] == self.collection_locator
+
+    def update(self):
+        collection = arvados.CollectionReader(arvados.Keep.get(self.collection_locator))
+        for s in collection.all_streams():
+            cwd = self
+            for part in s.name().split('/'):
+                if part != '' and part != '.':
+                    if part not in cwd._entries:
+                        cwd._entries[part] = self.inodes.add_entry(Directory(cwd.inode))
+                    cwd = cwd._entries[part]
+            for k, v in s.files().items():
+                cwd._entries[k] = self.inodes.add_entry(StreamReaderFile(cwd.inode, v))
+        self.fresh()
+
+
+class MagicDirectory(Directory):
+    '''A special directory that logically contains the set of all extant keep
+    locators.  When a file is referenced by lookup(), it is tested to see if it
+    is a valid keep locator to a manifest, and if so, loads the manifest
+    contents as a subdirectory of this directory with the locator as the
+    directory name.  Since querying a list of all extant keep locators is
+    impractical, only collections that have already been accessed are visible
+    to readdir().
+    '''
+
+    def __init__(self, parent_inode, inodes):
+        super(MagicDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+
+    def __contains__(self, k):
+        if k in self._entries:
+            return True
+        try:
+            if arvados.Keep.get(k):
+                return True
+            else:
+                return False
+        except Exception as e:
+            #print 'exception keep', e
+            return False
+
+    def __getitem__(self, item):
+        if item not in self._entries:
+            self._entries[item] = self.inodes.add_entry(CollectionDirectory(self.inode, self.inodes, item))
+        return self._entries[item]
+
+
+class TagsDirectory(Directory):
+    '''A special directory that contains as subdirectories all tags visible to the user.'''
+
+    def __init__(self, parent_inode, inodes, api, poll_time=60):
+        super(TagsDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+        self.api = api
+        try:
+            arvados.events.subscribe(self.api, [['object_uuid', 'is_a', 'arvados#link']], lambda ev: self.invalidate())
+        except:
+            self._poll = True
+            self._poll_time = poll_time
+
+    def invalidate(self):
+        with llfuse.lock:
+            super(TagsDirectory, self).invalidate()
+            for a in self._entries:
+                self._entries[a].invalidate()
+
+    def update(self):
+        tags = self.api.links().list(filters=[['link_class', '=', 'tag']], select=['name'], distinct = True).execute()
+        self.merge(tags['items'],
+                   lambda i: i['name'],
+                   lambda a, i: a.tag == i,
+                   lambda i: TagDirectory(self.inode, self.inodes, self.api, i['name'], poll=self._poll, poll_time=self._poll_time))
+
+class TagDirectory(Directory):
+    '''A special directory that contains as subdirectories all collections visible
+    to the user that are tagged with a particular tag.
+    '''
+
+    def __init__(self, parent_inode, inodes, api, tag, poll=False, poll_time=60):
+        super(TagDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+        self.api = api
+        self.tag = tag
+        self._poll = poll
+        self._poll_time = poll_time
+
+    def update(self):
+        taggedcollections = self.api.links().list(filters=[['link_class', '=', 'tag'],
+                                               ['name', '=', self.tag],
+                                               ['head_uuid', 'is_a', 'arvados#collection']],
+                                      select=['head_uuid']).execute()
+        self.merge(taggedcollections['items'],
+                   lambda i: i['head_uuid'],
+                   lambda a, i: a.collection_locator == i['head_uuid'],
+                   lambda i: CollectionDirectory(self.inode, self.inodes, i['head_uuid']))
+
+
+class GroupsDirectory(Directory):
+    '''A special directory that contains as subdirectories all groups visible to the user.'''
+
+    def __init__(self, parent_inode, inodes, api, poll_time=60):
+        super(GroupsDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+        self.api = api
+        try:
+            arvados.events.subscribe(self.api, [], lambda ev: self.invalidate())
+        except:
+            self._poll = True
+            self._poll_time = poll_time
+
+    def invalidate(self):
+        with llfuse.lock:
+            super(GroupsDirectory, self).invalidate()
+            for a in self._entries:
+                self._entries[a].invalidate()
+
+    def update(self):
+        groups = self.api.groups().list().execute()
+        self.merge(groups['items'],
+                   lambda i: i['uuid'],
+                   lambda a, i: a.uuid == i['uuid'],
+                   lambda i: GroupDirectory(self.inode, self.inodes, self.api, i, poll=self._poll, poll_time=self._poll_time))
+
+
+class GroupDirectory(Directory):
+    '''A special directory that contains the contents of a group.'''
+
+    def __init__(self, parent_inode, inodes, api, uuid, poll=False, poll_time=60):
+        super(GroupDirectory, self).__init__(parent_inode)
+        self.inodes = inodes
+        self.api = api
+        self.uuid = uuid['uuid']
+        self._poll = poll
+        self._poll_time = poll_time
+
+    def invalidate(self):
+        with llfuse.lock:
+            super(GroupDirectory, self).invalidate()
+            for a in self._entries:
+                self._entries[a].invalidate()
+
+    def createDirectory(self, i):
+        if re.match(r'[0-9a-f]{32}\+\d+', i['uuid']):
+            return CollectionDirectory(self.inode, self.inodes, i['uuid'])
+        elif re.match(r'[a-z0-9]{5}-j7d0g-[a-z0-9]{15}', i['uuid']):
+            return GroupDirectory(self.parent_inode, self.inodes, self.api, i, self._poll, self._poll_time)
+        elif re.match(r'[a-z0-9]{5}-[a-z0-9]{5}-[a-z0-9]{15}', i['uuid']):
+            return ObjectFile(self.parent_inode, i)
+        return None
+
+    def update(self):
+        contents = self.api.groups().contents(uuid=self.uuid, include_linked=True).execute()
+        links = {}
+        for a in contents['links']:
+            links[a['head_uuid']] = a['name']
+
+        def choose_name(i):
+            if i['uuid'] in links:
+                return links[i['uuid']]
+            else:
+                return i['uuid']
+
+        def same(a, i):
+            if isinstance(a, CollectionDirectory):
+                return a.collection_locator == i['uuid']
+            elif isinstance(a, GroupDirectory):
+                return a.uuid == i['uuid']
+            elif isinstance(a, ObjectFile):
+                return a.uuid == i['uuid'] and not a.stale()
+            return False
+
+        self.merge(contents['items'],
+                   choose_name,
+                   same,
+                   self.createDirectory)
+
+
+class FileHandle(object):
+    '''Connects a numeric file handle to a File or Directory object that has
+    been opened by the client.'''
+
+    def __init__(self, fh, entry):
+        self.fh = fh
+        self.entry = entry
+
+
+class Inodes(object):
+    '''Manage the set of inodes.  This is the mapping from a numeric id
+    to a concrete File or Directory object'''
+
+    def __init__(self):
+        self._entries = {}
+        self._counter = llfuse.ROOT_INODE
+
+    def __getitem__(self, item):
+        return self._entries[item]
+
+    def __setitem__(self, key, item):
+        self._entries[key] = item
+
+    def __iter__(self):
+        return self._entries.iterkeys()
+
+    def items(self):
+        return self._entries.items()
+
+    def __contains__(self, k):
+        return k in self._entries
+
+    def add_entry(self, entry):
+        entry.inode = self._counter
+        self._entries[entry.inode] = entry
+        self._counter += 1
+        return entry
+
+    def del_entry(self, entry):
+        llfuse.invalidate_inode(entry.inode)
+        del self._entries[entry.inode]
+
+class Operations(llfuse.Operations):
+    '''This is the main interface with llfuse.  The methods on this object are
+    called by llfuse threads to service FUSE events to query and read from
+    the file system.
+
+    llfuse has its own global lock which is acquired before calling a request handler,
+    so request handlers do not run concurrently unless the lock is explicitly released
+    with llfuse.lock_released.'''
+
+    def __init__(self, uid, gid):
+        super(Operations, self).__init__()
+
+        self.inodes = Inodes()
+        self.uid = uid
+        self.gid = gid
+
+        # dict of inode to filehandle
+        self._filehandles = {}
+        self._filehandles_counter = 1
+
+        # Other threads that need to wait until the fuse driver
+        # is fully initialized should wait() on this event object.
+        self.initlock = threading.Event()
+
+    def init(self):
+        # Allow threads that are waiting for the driver to be finished
+        # initializing to continue
+        self.initlock.set()
+
+    def access(self, inode, mode, ctx):
+        return True
+
+    def getattr(self, inode):
+        if inode not in self.inodes:
+            raise llfuse.FUSEError(errno.ENOENT)
+
+        e = self.inodes[inode]
+
+        entry = llfuse.EntryAttributes()
+        entry.st_ino = inode
+        entry.generation = 0
+        entry.entry_timeout = 300
+        entry.attr_timeout = 300
+
+        entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
+        if isinstance(e, Directory):
+            entry.st_mode |= stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH | stat.S_IFDIR
+        else:
+            entry.st_mode |= stat.S_IFREG
+
+        entry.st_nlink = 1
+        entry.st_uid = self.uid
+        entry.st_gid = self.gid
+        entry.st_rdev = 0
+
+        entry.st_size = e.size()
+
+        entry.st_blksize = 1024
+        entry.st_blocks = e.size()/1024
+        if e.size()/1024 != 0:
+            entry.st_blocks += 1
+        entry.st_atime = 0
+        entry.st_mtime = 0
+        entry.st_ctime = 0
+
+        return entry
+
+    def lookup(self, parent_inode, name):
+        #print "lookup: parent_inode", parent_inode, "name", name
+        inode = None
+
+        if name == '.':
+            inode = parent_inode
+        else:
+            if parent_inode in self.inodes:
+                p = self.inodes[parent_inode]
+                if name == '..':
+                    inode = p.parent_inode
+                elif name in p:
+                    inode = p[name].inode
+
+        if inode != None:
+            return self.getattr(inode)
+        else:
+            raise llfuse.FUSEError(errno.ENOENT)
+
+    def open(self, inode, flags):
+        if inode in self.inodes:
+            p = self.inodes[inode]
+        else:
+            raise llfuse.FUSEError(errno.ENOENT)
+
+        if (flags & os.O_WRONLY) or (flags & os.O_RDWR):
+            raise llfuse.FUSEError(errno.EROFS)
+
+        if isinstance(p, Directory):
+            raise llfuse.FUSEError(errno.EISDIR)
+
+        fh = self._filehandles_counter
+        self._filehandles_counter += 1
+        self._filehandles[fh] = FileHandle(fh, p)
+        return fh
+
+    def read(self, fh, off, size):
+        #print "read", fh, off, size
+        if fh in self._filehandles:
+            handle = self._filehandles[fh]
+        else:
+            raise llfuse.FUSEError(errno.EBADF)
+
+        try:
+            with llfuse.lock_released:
+                return handle.entry.readfrom(off, size)
+        except:
+            raise llfuse.FUSEError(errno.EIO)
+
+    def release(self, fh):
+        if fh in self._filehandles:
+            del self._filehandles[fh]
+
+    def opendir(self, inode):
+        #print "opendir: inode", inode
+
+        if inode in self.inodes:
+            p = self.inodes[inode]
+        else:
+            raise llfuse.FUSEError(errno.ENOENT)
+
+        if not isinstance(p, Directory):
+            raise llfuse.FUSEError(errno.ENOTDIR)
+
+        fh = self._filehandles_counter
+        self._filehandles_counter += 1
+        if p.parent_inode in self.inodes:
+            parent = self.inodes[p.parent_inode]
+        else:
+            raise llfuse.FUSEError(errno.EIO)
+
+        self._filehandles[fh] = FileHandle(fh, [('.', p), ('..', parent)] + list(p.items()))
+        return fh
+
+    def readdir(self, fh, off):
+        #print "readdir: fh", fh, "off", off
+
+        if fh in self._filehandles:
+            handle = self._filehandles[fh]
+        else:
+            raise llfuse.FUSEError(errno.EBADF)
+
+        #print "handle.entry", handle.entry
+
+        e = off
+        while e < len(handle.entry):
+            if handle.entry[e][1].inode in self.inodes:
+                yield (handle.entry[e][0], self.getattr(handle.entry[e][1].inode), e+1)
+            e += 1
+
+    def releasedir(self, fh):
+        del self._filehandles[fh]
+
+    def statfs(self):
+        st = llfuse.StatvfsData()
+        st.f_bsize = 1024 * 1024
+        st.f_blocks = 0
+        st.f_files = 0
+
+        st.f_bfree = 0
+        st.f_bavail = 0
+
+        st.f_ffree = 0
+        st.f_favail = 0
+
+        st.f_frsize = 0
+        return st
+
+    # The llfuse documentation recommends only overloading functions that
+    # are actually implemented, as the default implementation will raise ENOSYS.
+    # However, there is a bug in the llfuse default implementation of create()
+    # "create() takes exactly 5 positional arguments (6 given)" which will crash
+    # arv-mount.
+    # The workaround is to implement it with the proper number of parameters,
+    # and then everything works out.
+    def create(self, p1, p2, p3, p4, p5):
+        raise llfuse.FUSEError(errno.EROFS)
diff --git a/sdk/python/bin/arv-mount b/sdk/python/bin/arv-mount

index 5e773dfbc6c0185f32b02b76ab3255692a8b4cbe..e7e559cb6e4b903867c88a072e443d212a83b1b1 100755 (executable)
--- a/sdk/python/bin/arv-mount
+++ b/sdk/python/bin/arv-mount
@@ -1,22 +1,33 @@
  #!/usr/bin/env python
  
-from arvados.fuse import * 
+from arvados.fuse import *
  import arvados
  import subprocess
  import argparse
+import daemon
  
  if __name__ == '__main__':
      # Handle command line parameters
      parser = argparse.ArgumentParser(
-        description='Mount Keep data under the local filesystem.',
+        description='''Mount Keep data under the local filesystem.  By default, if neither
+        --collection or --tags is specified, this mounts as a virtual directory
+        under which all Keep collections are available as subdirectories named
+        with the Keep locator; however directories will not be visible to 'ls'
+        until a program tries to access them.''',
          epilog="""
  Note: When using the --exec feature, you must either specify the
  mountpoint before --exec, or mark the end of your --exec arguments
  with "--".
  """)
      parser.add_argument('mountpoint', type=str, help="""Mount point.""")
-    parser.add_argument('--collection', type=str, help="""Collection locator""")
+    parser.add_argument('--allow-other', action='store_true',
+                        help="""Let other users read the mount""")
+    parser.add_argument('--collection', type=str, help="""Mount only the specified collection at the mount point.""")
+    parser.add_argument('--tags', action='store_true', help="""Mount as a virtual directory consisting of subdirectories representing tagged
+collections on the server.""")
+    parser.add_argument('--groups', action='store_true', help="""Mount as a virtual directory consisting of subdirectories representing groups on the server.""")
      parser.add_argument('--debug', action='store_true', help="""Debug mode""")
+    parser.add_argument('--foreground', action='store_true', help="""Run in foreground (default is to daemonize unless --exec specified)""", default=False)
      parser.add_argument('--exec', type=str, nargs=argparse.REMAINDER,
                          dest="exec_args", metavar=('command', 'args', '...', '--'),
                          help="""Mount, run a command, then unmount and exit""")
@@ -26,25 +37,27 @@ with "--".
      # Create the request handler
      operations = Operations(os.getuid(), os.getgid())
  
-    if args.collection != None:
+    if args.groups:
+        api = arvados.api('v1')
+        e = operations.inodes.add_entry(GroupsDirectory(llfuse.ROOT_INODE, operations.inodes, api))
+    elif args.tags:
+        api = arvados.api('v1')
+        e = operations.inodes.add_entry(TagsDirectory(llfuse.ROOT_INODE, operations.inodes, api))
+    elif args.collection != None:
          # Set up the request handler with the collection at the root
-        e = operations.inodes.add_entry(Directory(llfuse.ROOT_INODE))
-        operations.inodes.load_collection(e, arvados.CollectionReader(arvados.Keep.get(args.collection)))
+        e = operations.inodes.add_entry(CollectionDirectory(llfuse.ROOT_INODE, operations.inodes, args.collection))
      else:
          # Set up the request handler with the 'magic directory' at the root
          operations.inodes.add_entry(MagicDirectory(llfuse.ROOT_INODE, operations.inodes))
  
      # FUSE options, see mount.fuse(8)
-    opts = []
-
-    # Enable FUSE debugging (logs each FUSE request)
-    if args.debug:
-        opts += ['debug']    
-    
-    # Initialize the fuse connection
-    llfuse.init(operations, args.mountpoint, opts)
+    opts = [optname for optname in ['allow_other', 'debug']
+            if getattr(args, optname)]
  
      if args.exec_args:
+        # Initialize the fuse connection
+        llfuse.init(operations, args.mountpoint, opts)
+
          t = threading.Thread(None, lambda: llfuse.main())
          t.start()
  
@@ -64,4 +77,12 @@ with "--".
  
          exit(rc)
      else:
-        llfuse.main()
+        if args.foreground:
+            # Initialize the fuse connection
+            llfuse.init(operations, args.mountpoint, opts)
+            llfuse.main()
+        else:
+            with daemon.DaemonContext():
+                # Initialize the fuse connection
+                llfuse.init(operations, args.mountpoint, opts)
+                llfuse.main()
diff --git a/sdk/python/build.sh b/sdk/python/build.sh

deleted file mode 100755 (executable)

index 4808954..0000000
--- a/sdk/python/build.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-#!/bin/sh
-#
-# Apparently the only reliable way to distribute Python packages with pypi and
-# install them via pip is as source packages (sdist).
-#
-# That means that setup.py is run on the system the package is being installed on,
-# outside of the Arvados git tree.
-#
-# In turn, this means that we can not build the minor_version on the fly when
-# setup.py is being executed. Instead, we use this script to generate a 'static'
-# version of setup.py which will can be distributed via pypi.
-
-minor_version=`git log --format=format:%ct.%h -n1 .`
-
-sed "s|%%MINOR_VERSION%%|$minor_version|" < setup.py.src > setup.py
-
diff --git a/sdk/python/requirements.txt b/sdk/python/requirements.txt

index 16dcffeffc7c82f65010de1f0249f2bdae87b390..a6a7591c8d11c8cdb77c5dffe9a34de55daa8355 100644 (file)
--- a/sdk/python/requirements.txt
+++ b/sdk/python/requirements.txt
@@ -3,3 +3,6 @@ httplib2==0.8
  python-gflags==2.0
  urllib3==1.7.1
  llfuse==0.40
+ws4py==0.3.4
+PyYAML==3.11
+python-daemon==1.6
diff --git a/sdk/python/run_test_server.py b/sdk/python/run_test_server.py

new file mode 100644 (file)

index 0000000..b4886f0
--- /dev/null
+++ b/sdk/python/run_test_server.py
@@ -0,0 +1,86 @@
+import subprocess
+import time
+import os
+import signal
+import yaml
+import sys
+import argparse
+import arvados.config
+
+ARV_API_SERVER_DIR = '../../services/api'
+SERVER_PID_PATH = 'tmp/pids/server.pid'
+WEBSOCKETS_SERVER_PID_PATH = 'tmp/pids/passenger.3001.pid'
+
+def find_server_pid(PID_PATH):
+    timeout = time.time() + 10
+    good_pid = False
+    while (not good_pid) and (time.time() < timeout):
+        time.sleep(0.2)
+        try:
+            with open(PID_PATH, 'r') as f:
+                server_pid = int(f.read())
+            good_pid = (os.kill(server_pid, 0) == None)
+        except Exception:
+            good_pid = False
+
+    if not good_pid:
+        raise Exception("could not find API server Rails pid")
+
+    return server_pid
+
+def run(websockets=False):
+    cwd = os.getcwd()
+    os.chdir(ARV_API_SERVER_DIR)
+    os.environ["RAILS_ENV"] = "test"
+    subprocess.call(['bundle', 'exec', 'rake', 'db:test:load'])
+    subprocess.call(['bundle', 'exec', 'rake', 'db:fixtures:load'])
+    if websockets:
+        os.environ["ARVADOS_WEBSOCKETS"] = "true"
+        subprocess.call(['openssl', 'req', '-new', '-x509', '-nodes',
+                         '-out', './self-signed.pem',
+                         '-keyout', './self-signed.key',
+                         '-days', '3650',
+                         '-subj', '/CN=localhost'])
+        subprocess.call(['passenger', 'start', '-d', '-p3001', '--ssl',
+                         '--ssl-certificate', 'self-signed.pem',
+                         '--ssl-certificate-key', 'self-signed.key'])
+        find_server_pid(WEBSOCKETS_SERVER_PID_PATH)
+    else:
+        subprocess.call(['bundle', 'exec', 'rails', 'server', '-d', '-p3001'])
+        find_server_pid(SERVER_PID_PATH)
+    #os.environ["ARVADOS_API_HOST"] = "localhost:3001"
+    os.environ["ARVADOS_API_HOST"] = "127.0.0.1:3001"
+    os.environ["ARVADOS_API_HOST_INSECURE"] = "true"
+    os.chdir(cwd)
+
+def stop(websockets=False):
+    cwd = os.getcwd()
+    os.chdir(ARV_API_SERVER_DIR)
+    if websockets:
+        os.kill(find_server_pid(WEBSOCKETS_SERVER_PID_PATH), signal.SIGTERM)
+        os.unlink('self-signed.pem')
+        os.unlink('self-signed.key')
+    else:
+        os.kill(find_server_pid(SERVER_PID_PATH), signal.SIGTERM)
+    os.chdir(cwd)
+
+def fixture(fix):
+    '''load a fixture yaml file'''
+    with open(os.path.join(ARV_API_SERVER_DIR, "test", "fixtures",
+                           fix + ".yml")) as f:
+        return yaml.load(f.read())
+
+def authorize_with(token):
+    '''token is the symbolic name of the token from the api_client_authorizations fixture'''
+    arvados.config.settings()["ARVADOS_API_TOKEN"] = fixture("api_client_authorizations")[token]["api_token"]
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser()
+    parser.add_argument('action', type=str, help='''one of "start" or "stop"''')
+    parser.add_argument('--websockets', action='store_true', default=False)
+    args = parser.parse_args()
+
+    if args.action == 'start':
+        run(args.websockets)
+    elif args.action == 'stop':
+        stop(args.websockets)
diff --git a/sdk/python/setup.py b/sdk/python/setup.py

new file mode 100644 (file)

index 0000000..e8bcb5c
--- /dev/null
+++ b/sdk/python/setup.py
@@ -0,0 +1,26 @@
+from setuptools import setup
+import setup_header
+
+setup(name='arvados-python-client',
+      version='0.1.' + setup_header.minor_version,
+      description='Arvados client library',
+      author='Arvados',
+      author_email='info@arvados.org',
+      url="https://arvados.org",
+      download_url="https://github.com/curoverse/arvados.git",
+      license='Apache 2.0',
+      packages=['arvados'],
+      scripts=[
+        'bin/arv-get',
+        'bin/arv-put',
+        'bin/arv-ls',
+        'bin/arv-normalize',
+        ],
+      install_requires=[
+        'python-gflags',
+        'google-api-python-client',
+        'httplib2',
+        'urllib3',
+        'ws4py'
+        ],
+      zip_safe=False)
diff --git a/sdk/python/setup.py.src b/sdk/python/setup.py.src

index 9b82f4efe593debecf9ae4ef28c36d3ac299372f..fbb8c52337dcbe9fd22ddcdab3e1f3af3d7bf086 100644 (file)
--- a/sdk/python/setup.py.src
+++ b/sdk/python/setup.py.src
@@ -1,8 +1,3 @@
-from setuptools import setup
-import subprocess
-
-minor_version = '%%MINOR_VERSION%%'
-
  setup(name='arvados-python-client',
        version='0.1.' + minor_version,
        description='Arvados client library',
@@ -15,7 +10,6 @@ setup(name='arvados-python-client',
        scripts=[
          'bin/arv-get',
          'bin/arv-put',
-        'bin/arv-mount',
          'bin/arv-ls',
          'bin/arv-normalize',
          ],
@@ -24,6 +18,6 @@ setup(name='arvados-python-client',
          'google-api-python-client',
          'httplib2',
          'urllib3',
-       'llfuse'
+        'ws4py'
          ],
        zip_safe=False)
diff --git a/sdk/python/setup_fuse.py b/sdk/python/setup_fuse.py

new file mode 100644 (file)

index 0000000..28aa89f
--- /dev/null
+++ b/sdk/python/setup_fuse.py
@@ -0,0 +1,21 @@
+from setuptools import setup
+import setup_header
+
+setup(name='arvados-fuse-driver',
+      version='0.1.' + setup_header.minor_version,
+      description='Arvados FUSE driver',
+      author='Arvados',
+      author_email='info@arvados.org',
+      url="https://arvados.org",
+      download_url="https://github.com/curoverse/arvados.git",
+      license='Apache 2.0',
+      packages=['arvados.fuse'],
+      scripts=[
+        'bin/arv-mount'
+        ],
+      install_requires=[
+        'arvados-python-client',
+       'llfuse',
+        'python-daemon'
+        ],
+      zip_safe=False)
diff --git a/sdk/python/setup_header.py b/sdk/python/setup_header.py

new file mode 100644 (file)

index 0000000..c97f94d
--- /dev/null
+++ b/sdk/python/setup_header.py
@@ -0,0 +1,11 @@
+import shutil
+import os
+import sys
+
+with os.popen("git log --format=format:%ct.%h -n1 .") as m:
+    minor_version=m.read()
+
+# setup.py and setup_fuse.py both share the build/ directory (argh!) so
+# make sure to delete it to avoid scooping up the wrong files.
+build_dir = os.path.join(os.path.dirname(sys.argv[0]), 'build')
+shutil.rmtree(build_dir, ignore_errors=True)
diff --git a/sdk/python/test_mount.py b/sdk/python/test_mount.py

index ce615984ed326d56f9c6b7b4143742cd7215b827..a462799899b83ea878f7e59a548cc75f2a67c29c 100644 (file)
--- a/sdk/python/test_mount.py
+++ b/sdk/python/test_mount.py
@@ -9,11 +9,27 @@ import tempfile
  import shutil
  import subprocess
  import glob
+import run_test_server
+import json
  
-class FuseMountTest(unittest.TestCase):
+class MountTestBase(unittest.TestCase):
      def setUp(self):
          self.keeptmp = tempfile.mkdtemp()
          os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
+        self.mounttmp = tempfile.mkdtemp()
+
+    def tearDown(self):
+        # llfuse.close is buggy, so use fusermount instead.
+        #llfuse.close(unmount=True)
+        subprocess.call(["fusermount", "-u", self.mounttmp])
+
+        os.rmdir(self.mounttmp)
+        shutil.rmtree(self.keeptmp)
+
+
+class FuseMountTest(MountTestBase):
+    def setUp(self):
+        super(FuseMountTest, self).setUp()
  
          cw = arvados.CollectionWriter()
  
@@ -46,10 +62,7 @@ class FuseMountTest(unittest.TestCase):
      def runTest(self):
          # Create the request handler
          operations = fuse.Operations(os.getuid(), os.getgid())
-        e = operations.inodes.add_entry(fuse.Directory(llfuse.ROOT_INODE))
-        operations.inodes.load_collection(e, arvados.CollectionReader(arvados.Keep.get(self.testcollection)))
-
-        self.mounttmp = tempfile.mkdtemp()
+        e = operations.inodes.add_entry(fuse.CollectionDirectory(llfuse.ROOT_INODE, operations.inodes, self.testcollection))
  
          llfuse.init(operations, self.mounttmp, [])
          t = threading.Thread(None, lambda: llfuse.main())
@@ -61,46 +74,37 @@ class FuseMountTest(unittest.TestCase):
          # now check some stuff
          d1 = os.listdir(self.mounttmp)
          d1.sort()
-        self.assertEqual(d1, ['dir1', 'dir2', 'thing1.txt', 'thing2.txt'])
+        self.assertEqual(['dir1', 'dir2', 'thing1.txt', 'thing2.txt'], d1)
  
          d2 = os.listdir(os.path.join(self.mounttmp, 'dir1'))
          d2.sort()
-        self.assertEqual(d2, ['thing3.txt', 'thing4.txt'])
+        self.assertEqual(['thing3.txt', 'thing4.txt'], d2)
  
          d3 = os.listdir(os.path.join(self.mounttmp, 'dir2'))
          d3.sort()
-        self.assertEqual(d3, ['dir3', 'thing5.txt', 'thing6.txt'])
+        self.assertEqual(['dir3', 'thing5.txt', 'thing6.txt'], d3)
  
          d4 = os.listdir(os.path.join(self.mounttmp, 'dir2/dir3'))
          d4.sort()
-        self.assertEqual(d4, ['thing7.txt', 'thing8.txt'])
-        
+        self.assertEqual(['thing7.txt', 'thing8.txt'], d4)
+
          files = {'thing1.txt': 'data 1',
                   'thing2.txt': 'data 2',
                   'dir1/thing3.txt': 'data 3',
                   'dir1/thing4.txt': 'data 4',
                   'dir2/thing5.txt': 'data 5',
-                 'dir2/thing6.txt': 'data 6',         
+                 'dir2/thing6.txt': 'data 6',
                   'dir2/dir3/thing7.txt': 'data 7',
                   'dir2/dir3/thing8.txt': 'data 8'}
  
          for k, v in files.items():
              with open(os.path.join(self.mounttmp, k)) as f:
-                self.assertEqual(f.read(), v)
-        
-
-    def tearDown(self):
-        # llfuse.close is buggy, so use fusermount instead.
-        #llfuse.close(unmount=True)
-        subprocess.call(["fusermount", "-u", self.mounttmp])
+                self.assertEqual(v, f.read())
  
-        os.rmdir(self.mounttmp)
-        shutil.rmtree(self.keeptmp)
  
-class FuseMagicTest(unittest.TestCase):
+class FuseMagicTest(MountTestBase):
      def setUp(self):
-        self.keeptmp = tempfile.mkdtemp()
-        os.environ['KEEP_LOCAL_STORE'] = self.keeptmp
+        super(FuseMagicTest, self).setUp()
  
          cw = arvados.CollectionWriter()
  
@@ -126,28 +130,202 @@ class FuseMagicTest(unittest.TestCase):
          # now check some stuff
          d1 = os.listdir(self.mounttmp)
          d1.sort()
-        self.assertEqual(d1, [])
+        self.assertEqual([], d1)
  
          d2 = os.listdir(os.path.join(self.mounttmp, self.testcollection))
          d2.sort()
-        self.assertEqual(d2, ['thing1.txt'])
+        self.assertEqual(['thing1.txt'], d2)
  
          d3 = os.listdir(self.mounttmp)
          d3.sort()
-        self.assertEqual(d3, [self.testcollection])
-        
+        self.assertEqual([self.testcollection], d3)
+
          files = {}
          files[os.path.join(self.mounttmp, self.testcollection, 'thing1.txt')] = 'data 1'
  
          for k, v in files.items():
              with open(os.path.join(self.mounttmp, k)) as f:
-                self.assertEqual(f.read(), v)
-        
+                self.assertEqual(v, f.read())
+
+
+class FuseTagsTest(MountTestBase):
+    def setUp(self):
+        super(FuseTagsTest, self).setUp()
+
+        cw = arvados.CollectionWriter()
+
+        cw.start_new_file('foo')
+        cw.write("foo")
+
+        self.testcollection = cw.finish()
+
+        run_test_server.run()
+
+    def runTest(self):
+        run_test_server.authorize_with("admin")
+        api = arvados.api('v1')
+
+        operations = fuse.Operations(os.getuid(), os.getgid())
+        e = operations.inodes.add_entry(fuse.TagsDirectory(llfuse.ROOT_INODE, operations.inodes, api))
+
+        llfuse.init(operations, self.mounttmp, [])
+        t = threading.Thread(None, lambda: llfuse.main())
+        t.start()
+
+        # wait until the driver is finished initializing
+        operations.initlock.wait()
+
+        d1 = os.listdir(self.mounttmp)
+        d1.sort()
+        self.assertEqual(['foo_tag'], d1)
+
+        d2 = os.listdir(os.path.join(self.mounttmp, 'foo_tag'))
+        d2.sort()
+        self.assertEqual(['1f4b0bc7583c2a7f9102c395f4ffc5e3+45'], d2)
+
+        d3 = os.listdir(os.path.join(self.mounttmp, 'foo_tag', '1f4b0bc7583c2a7f9102c395f4ffc5e3+45'))
+        d3.sort()
+        self.assertEqual(['foo'], d3)
+
+        files = {}
+        files[os.path.join(self.mounttmp, 'foo_tag', '1f4b0bc7583c2a7f9102c395f4ffc5e3+45', 'foo')] = 'foo'
+
+        for k, v in files.items():
+            with open(os.path.join(self.mounttmp, k)) as f:
+                self.assertEqual(v, f.read())
+
  
      def tearDown(self):
-        # llfuse.close is buggy, so use fusermount instead.
-        #llfuse.close(unmount=True)
-        subprocess.call(["fusermount", "-u", self.mounttmp])
+        run_test_server.stop()
  
-        os.rmdir(self.mounttmp)
-        shutil.rmtree(self.keeptmp)
+        super(FuseTagsTest, self).tearDown()
+
+class FuseTagsUpdateTestBase(MountTestBase):
+
+    def runRealTest(self):
+        run_test_server.authorize_with("admin")
+        api = arvados.api('v1')
+
+        operations = fuse.Operations(os.getuid(), os.getgid())
+        e = operations.inodes.add_entry(fuse.TagsDirectory(llfuse.ROOT_INODE, operations.inodes, api, poll_time=1))
+
+        llfuse.init(operations, self.mounttmp, [])
+        t = threading.Thread(None, lambda: llfuse.main())
+        t.start()
+
+        # wait until the driver is finished initializing
+        operations.initlock.wait()
+
+        d1 = os.listdir(self.mounttmp)
+        d1.sort()
+        self.assertEqual(d1, ['foo_tag'])
+
+        api.links().create(body={'link': {
+            'head_uuid': 'fa7aeb5140e2848d39b416daeef4ffc5+45',
+            'link_class': 'tag',
+            'name': 'bar_tag'
+        }}).execute()
+
+        time.sleep(1)
+
+        d2 = os.listdir(self.mounttmp)
+        d2.sort()
+        self.assertEqual(['bar_tag', 'foo_tag'], d2)
+
+        d3 = os.listdir(os.path.join(self.mounttmp, 'bar_tag'))
+        d3.sort()
+        self.assertEqual(['fa7aeb5140e2848d39b416daeef4ffc5+45'], d3)
+
+        l = api.links().create(body={'link': {
+            'head_uuid': 'ea10d51bcf88862dbcc36eb292017dfd+45',
+            'link_class': 'tag',
+            'name': 'bar_tag'
+        }}).execute()
+
+        time.sleep(1)
+
+        d4 = os.listdir(os.path.join(self.mounttmp, 'bar_tag'))
+        d4.sort()
+        self.assertEqual(['ea10d51bcf88862dbcc36eb292017dfd+45', 'fa7aeb5140e2848d39b416daeef4ffc5+45'], d4)
+
+        api.links().delete(uuid=l['uuid']).execute()
+
+        time.sleep(1)
+
+        d5 = os.listdir(os.path.join(self.mounttmp, 'bar_tag'))
+        d5.sort()
+        self.assertEqual(['fa7aeb5140e2848d39b416daeef4ffc5+45'], d5)
+
+
+class FuseTagsUpdateTestWebsockets(FuseTagsUpdateTestBase):
+    def setUp(self):
+        super(FuseTagsUpdateTestWebsockets, self).setUp()
+        run_test_server.run(True)
+
+    def runTest(self):
+        self.runRealTest()
+
+    def tearDown(self):
+        run_test_server.stop(True)
+        super(FuseTagsUpdateTestWebsockets, self).tearDown()
+
+
+class FuseTagsUpdateTestPoll(FuseTagsUpdateTestBase):
+    def setUp(self):
+        super(FuseTagsUpdateTestPoll, self).setUp()
+        run_test_server.run(False)
+
+    def runTest(self):
+        self.runRealTest()
+
+    def tearDown(self):
+        run_test_server.stop(False)
+        super(FuseTagsUpdateTestPoll, self).tearDown()
+
+
+class FuseGroupsTest(MountTestBase):
+    def setUp(self):
+        super(FuseGroupsTest, self).setUp()
+        run_test_server.run()
+
+    def runTest(self):
+        run_test_server.authorize_with("admin")
+        api = arvados.api('v1')
+
+        operations = fuse.Operations(os.getuid(), os.getgid())
+        e = operations.inodes.add_entry(fuse.GroupsDirectory(llfuse.ROOT_INODE, operations.inodes, api))
+
+        llfuse.init(operations, self.mounttmp, [])
+        t = threading.Thread(None, lambda: llfuse.main())
+        t.start()
+
+        # wait until the driver is finished initializing
+        operations.initlock.wait()
+
+        d1 = os.listdir(self.mounttmp)
+        d1.sort()
+        self.assertIn('zzzzz-j7d0g-v955i6s2oi1cbso', d1)
+
+        d2 = os.listdir(os.path.join(self.mounttmp, 'zzzzz-j7d0g-v955i6s2oi1cbso'))
+        d2.sort()
+        self.assertEqual(["I'm a job in a folder",
+                          "I'm a template in a folder",
+                          "zzzzz-j58dm-5gid26432uujf79",
+                          "zzzzz-j58dm-7r18rnd5nzhg5yk",
+                          "zzzzz-j7d0g-axqo7eu9pwvna1x"
+                      ], d2)
+
+        d3 = os.listdir(os.path.join(self.mounttmp, 'zzzzz-j7d0g-v955i6s2oi1cbso', 'zzzzz-j7d0g-axqo7eu9pwvna1x'))
+        d3.sort()
+        self.assertEqual(["I'm in a subfolder, too",
+                          "zzzzz-j58dm-c40lddwcqqr1ffs",
+                          "zzzzz-o0j2j-ryhm1bn83ni03sn"
+                      ], d3)
+
+        with open(os.path.join(self.mounttmp, 'zzzzz-j7d0g-v955i6s2oi1cbso', "I'm a template in a folder")) as f:
+            j = json.load(f)
+            self.assertEqual("Two Part Pipeline Template", j['name'])
+
+    def tearDown(self):
+        run_test_server.stop()
+        super(FuseGroupsTest, self).tearDown()
diff --git a/sdk/python/test_websockets.py b/sdk/python/test_websockets.py

new file mode 100644 (file)

index 0000000..b9f6502
--- /dev/null
+++ b/sdk/python/test_websockets.py
@@ -0,0 +1,32 @@
+import run_test_server
+import unittest
+import arvados
+import arvados.events
+import time
+
+class WebsocketTest(unittest.TestCase):
+    def setUp(self):
+        run_test_server.run(True)
+
+    def on_event(self, ev):
+        if self.state == 1:
+            self.assertEqual(200, ev['status'])
+            self.state = 2
+        elif self.state == 2:
+            self.assertEqual(self.h[u'uuid'], ev[u'object_uuid'])
+            self.state = 3
+        elif self.state == 3:
+            self.fail()
+
+    def runTest(self):
+        self.state = 1
+
+        run_test_server.authorize_with("admin")
+        api = arvados.api('v1')
+        arvados.events.subscribe(api, [['object_uuid', 'is_a', 'arvados#human']], lambda ev: self.on_event(ev))
+        time.sleep(1)
+        self.h = api.humans().create(body={}).execute()
+        time.sleep(1)
+
+    def tearDown(self):
+        run_test_server.stop(True)
diff --git a/services/api/app/controllers/arvados/v1/schema_controller.rb b/services/api/app/controllers/arvados/v1/schema_controller.rb

index 3ac614d1804d0fa69c2265dc38bc13dc14671e88..5d907b89ac45a0fb6e6d1ddd66c86d7f507eb176 100644 (file)
--- a/services/api/app/controllers/arvados/v1/schema_controller.rb
+++ b/services/api/app/controllers/arvados/v1/schema_controller.rb
@@ -20,7 +20,7 @@ class Arvados::V1::SchemaController < ApplicationController
          description: "The API to interact with Arvados.",
          documentationLink: "http://doc.arvados.org/api/index.html",
          protocol: "rest",
-        baseUrl: root_url + "/arvados/v1/",
+        baseUrl: root_url + "arvados/v1/",
          basePath: "/arvados/v1/",
          rootUrl: root_url,
          servicePath: "arvados/v1/",
@@ -73,7 +73,7 @@ class Arvados::V1::SchemaController < ApplicationController
        if Rails.application.config.websocket_address
          discovery[:websocketUrl] = Rails.application.config.websocket_address
        elsif ENV['ARVADOS_WEBSOCKETS']
-        discovery[:websocketUrl] = (root_url.sub /^http/, 'ws') + "/websocket"
+        discovery[:websocketUrl] = (root_url.sub /^http/, 'ws') + "websocket"
        end
  
        ActiveRecord::Base.descendants.reject(&:abstract_class?).each do |k|
diff --git a/services/api/lib/record_filters.rb b/services/api/lib/record_filters.rb

index d7e556b197323e60ee6d8d95ecce56c947c9a3a3..d3727d30f3d661f208f089bc52f21ce7219421b6 100644 (file)
--- a/services/api/lib/record_filters.rb
+++ b/services/api/lib/record_filters.rb
@@ -8,7 +8,7 @@
  module RecordFilters
  
    # Input:
-  # +filters+  Arvados filters as list of lists.
+  # +filters+        array of conditions, each being [column, operator, operand]
    # +ar_table_name+  name of SQL table
    #
    # Output:
@@ -29,8 +29,11 @@ module RecordFilters
          raise ArgumentError.new("Invalid attribute '#{attr}' in filter")
        end
        case operator.downcase
-      when '=', '<', '<=', '>', '>=', 'like'
+      when '=', '<', '<=', '>', '>=', '!=', 'like'
          if operand.is_a? String
+          if operator == '!='
+            operator = '<>'
+          end
            cond_out << "#{ar_table_name}.#{attr} #{operator} ?"
            if (# any operator that operates on value rather than
                # representation:
@@ -41,13 +44,15 @@ module RecordFilters
            param_out << operand
          elsif operand.nil? and operator == '='
            cond_out << "#{ar_table_name}.#{attr} is null"
+        elsif operand.nil? and operator == '!='
+          cond_out << "#{ar_table_name}.#{attr} is not null"
          else
            raise ArgumentError.new("Invalid operand type '#{operand.class}' "\
                                    "for '#{operator}' operator in filters")
          end
-      when 'in'
+      when 'in', 'not in'
          if operand.is_a? Array
-          cond_out << "#{ar_table_name}.#{attr} IN (?)"
+          cond_out << "#{ar_table_name}.#{attr} #{operator} (?)"
            param_out << operand
          else
            raise ArgumentError.new("Invalid operand type '#{operand.class}' "\
diff --git a/services/api/script/import_commits.rb b/services/api/script/import_commits.rb

deleted file mode 100755 (executable)

index 80c5748..0000000
--- a/services/api/script/import_commits.rb
+++ /dev/null
@@ -1,9 +0,0 @@
-#!/usr/bin/env ruby
-
-ENV["RAILS_ENV"] = ARGV[0] || ENV["RAILS_ENV"] || "development"
-
-require File.dirname(__FILE__) + '/../config/boot'
-require File.dirname(__FILE__) + '/../config/environment'
-require 'shellwords'
-
-Commit.import_all
diff --git a/services/api/test/functional/arvados/v1/jobs_controller_test.rb b/services/api/test/functional/arvados/v1/jobs_controller_test.rb

index af8f72902b69e265d33c0624381d5f9377340a37..0d1f71f621c2b7dc6f483c8e38ab84ddc88963aa 100644 (file)
--- a/services/api/test/functional/arvados/v1/jobs_controller_test.rb
+++ b/services/api/test/functional/arvados/v1/jobs_controller_test.rb
@@ -177,6 +177,40 @@ class Arvados::V1::JobsControllerTest < ActionController::TestCase
                                'zzzzz-8i9sb-pshmckwoma9plh7']
    end
  
+  test "search jobs by uuid with 'not in' query" do
+    exclude_uuids = [jobs(:running).uuid,
+                     jobs(:running_cancelled).uuid]
+    authorize_with :active
+    get :index, {
+      filters: [['uuid', 'not in', exclude_uuids]]
+    }
+    assert_response :success
+    found = assigns(:objects).collect(&:uuid)
+    assert_not_empty found, "'not in' query returned nothing"
+    assert_empty(found & exclude_uuids,
+                 "'not in' query returned uuids I asked not to get")
+  end
+
+  ['=', '!='].each do |operator|
+    [['uuid', 'zzzzz-8i9sb-pshmckwoma9plh7'],
+     ['output', nil]].each do |attr, operand|
+      test "search jobs with #{attr} #{operator} #{operand.inspect} query" do
+        authorize_with :active
+        get :index, {
+          filters: [[attr, operator, operand]]
+        }
+        assert_response :success
+        values = assigns(:objects).collect { |x| x.send(attr) }
+        assert_not_empty values, "query should return non-empty result"
+        if operator == '='
+          assert_empty values - [operand], "query results do not satisfy query"
+        else
+          assert_empty values & [operand], "query results do not satisfy query"
+        end
+      end
+    end
+  end
+
    test "search jobs by started_at with < query" do
      authorize_with :active
      get :index, {
author	radhika <radhika@curoverse.com>
	Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)
committer	radhika <radhika@curoverse.com>
	Wed, 7 May 2014 16:11:22 +0000 (12:11 -0400)
apps/workbench/test/test_helper.rb		patch \| blob \| history
doc/_config.yml		patch \| blob \| history
doc/api/methods.html.textile.liquid		patch \| blob \| history
doc/api/methods/logs.html.textile.liquid		patch \| blob \| history
doc/api/schema/Job.html.textile.liquid		patch \| blob \| history
doc/install/index.html.md.liquid	[deleted file]	patch \| blob \| history
doc/install/index.html.textile.liquid	[new file with mode: 0644]	patch \| blob
doc/install/install-api-server.html.textile.liquid		patch \| blob \| history
doc/install/install-crunch-dispatch.html.textile.liquid		patch \| blob \| history
doc/install/install-sso.html.textile.liquid		patch \| blob \| history
doc/install/install-workbench-app.html.textile.liquid		patch \| blob \| history
doc/sdk/perl/index.html.textile.liquid		patch \| blob \| history
doc/sdk/python/sdk-python.html.textile.liquid		patch \| blob \| history
docker/build_tools/Makefile		patch \| blob \| history
docker/jobs/Dockerfile	[new file with mode: 0644]	patch \| blob
sdk/cli/bin/crunch-job		patch \| blob \| history
sdk/python/.gitignore		patch \| blob \| history
sdk/python/arvados/events.py	[new file with mode: 0644]	patch \| blob
sdk/python/arvados/fuse.py	[deleted file]	patch \| blob \| history
sdk/python/arvados/fuse/__init__.py	[new file with mode: 0644]	patch \| blob
sdk/python/bin/arv-mount		patch \| blob \| history
sdk/python/build.sh	[deleted file]	patch \| blob \| history
sdk/python/requirements.txt		patch \| blob \| history
sdk/python/run_test_server.py	[new file with mode: 0644]	patch \| blob
sdk/python/setup.py	[new file with mode: 0644]	patch \| blob
sdk/python/setup.py.src		patch \| blob \| history
sdk/python/setup_fuse.py	[new file with mode: 0644]	patch \| blob
sdk/python/setup_header.py	[new file with mode: 0644]	patch \| blob
sdk/python/test_mount.py		patch \| blob \| history
sdk/python/test_websockets.py	[new file with mode: 0644]	patch \| blob
services/api/app/controllers/arvados/v1/schema_controller.rb		patch \| blob \| history
services/api/lib/record_filters.rb		patch \| blob \| history
services/api/script/import_commits.rb	[deleted file]	patch \| blob \| history
services/api/test/functional/arvados/v1/jobs_controller_test.rb		patch \| blob \| history