Merge branch '12032-project-trash' refs #12032
authorPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 25 Sep 2017 19:39:50 +0000 (15:39 -0400)
committerPeter Amstutz <peter.amstutz@curoverse.com>
Mon, 25 Sep 2017 19:39:54 +0000 (15:39 -0400)
Arvados-DCO-1.1-Signed-off-by: Peter Amstutz <pamstutz@veritasgenetics.com>

26 files changed:
apps/workbench/app/views/projects/_show_dashboard.html.erb
apps/workbench/app/views/trash_items/_show_recent_trash.html.erb
apps/workbench/app/views/trash_items/_show_trash_rows.html.erb
apps/workbench/test/integration/trash_test.rb
build/run-library.sh
sdk/cwl/arvados_cwl/arvdocker.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/test_with_arvbox.sh
sdk/go/keepclient/hashcheck.go
sdk/go/keepclient/hashcheck_test.go
services/api/app/models/commit.rb
services/api/app/models/container.rb
services/api/lib/crunch_dispatch.rb
services/api/test/helpers/git_test_helper.rb
services/api/test/unit/commit_test.rb
services/api/test/unit/container_test.rb
services/fuse/arvados_fuse/__init__.py
services/fuse/arvados_fuse/fresh.py
services/fuse/arvados_fuse/fusedir.py
services/fuse/arvados_fuse/fusefile.py
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/Dockerfile.base
tools/arvbox/lib/arvbox/docker/common.sh
tools/arvbox/lib/arvbox/docker/createusers.sh
tools/arvbox/lib/arvbox/docker/service/postgres/run-service
tools/arvbox/lib/arvbox/docker/service/sdk/run-service

index 3be8e374a2977b16ca76b53f3e7de645c4751ea9..00780c4089d00b3bbb5332e115e5de5d88f2f922 100644 (file)
@@ -11,7 +11,7 @@ SPDX-License-Identifier: AGPL-3.0 %>
   preload_objects_for_dataclass(Container, recent_cr_containers) if recent_cr_containers.andand.any?
 
   # fetch children of all the active crs in one call, if there are any
-  active_crs = recent_crs.each {|cr| cr if (cr.priority > 0 and cr.state != 'Final' and cr.container_uuid)}
+  active_crs = recent_crs.each {|cr| cr if (cr.priority.andand > 0 and cr.state != 'Final' and cr.container_uuid)}
   active_cr_uuids = active_crs.map(&:uuid)
   active_cr_containers = active_crs.map {|cr| cr.container_uuid}.compact.uniq
   cr_children = {}
index a78b02fcb10c202876add3e9b7dddecad8fecbcd..c534e5256d1a70b0b2ba1cb2fb44de884e657142 100644 (file)
@@ -29,12 +29,12 @@ SPDX-License-Identifier: AGPL-3.0 %>
   <div>
     <table id="trash-index" class="topalign table table-condensed table-fixedlayout">
       <colgroup>
-        <col width="5%" />
+        <col width="2%" />
         <col width="20%" />
+        <col width="13%" />
         <col width="15%" />
-        <col width="15%" />
-        <col width="10%" />
-        <col width="30%" />
+        <col width="20%" />
+        <col width="25%" />
         <col width="5%" />
       </colgroup>
 
@@ -42,9 +42,9 @@ SPDX-License-Identifier: AGPL-3.0 %>
         <tr class="contain-align-left">
           <th></th>
           <th>Name</th>
-          <th>Trashed at</th>
-          <th title="After this time, no longer available to be recovered from Trash">Permanently<br/>Deleted At</th>
+          <th>Date&nbsp;trashed&nbsp;/<br />to&nbsp;be&nbsp;deleted</th>
           <th>Owner</th>
+          <th>UUID&nbsp;/<br />Content&nbsp;address&nbsp;(PDH)</th>
           <th>Contents</th>
           <th></th>
         </tr>
index 6fc1676b85dbcb378d300f5efffb67c2edde98a3..a7e53e28bd4940d8884fbd81fa477680212d95b9 100644 (file)
@@ -11,14 +11,22 @@ SPDX-License-Identifier: AGPL-3.0 %>
       </td>
       <td>
         <%= if !obj.name.blank? then obj.name else obj.uuid end %>
+      </td>
       <td>
-        <%= render_localized_date(obj.trash_at) if obj.trash_at %>
-      <td>
-        <%= render_localized_date(obj.delete_at) if obj.delete_at %>
+        <% if obj.trash_at %>
+          <%= render_localized_date(obj.trash_at)  %>
+        <% end %>
+        <br />
+        <% if obj.delete_at %>
+          <%= render_localized_date(obj.delete_at) %>
+        <% end %>
       </td>
       <td>
         <%= link_to_if_arvados_object obj.owner_uuid, friendly_name: true %>
       </td>
+      <td>
+        <%= obj.uuid %><br /><%= obj.portable_data_hash %>
+      </td>
       <td>
         <% for i in (0..[2, obj.files.length-1].min) %>
           <% file = obj.files[i] %>
index a8c26997876a15f057d16bd9b96b8391bb6d3d89..5c6987ec8c35651000d3b475497265de3dcfe008 100644 (file)
@@ -18,6 +18,8 @@ class TrashTest < ActionDispatch::IntegrationTest
     visit page_with_token('active', "/trash")
 
     assert_text deleted['name']
+    assert_text deleted['uuid']
+    assert_text deleted['portable_data_hash']
     assert_text expired1['name']
     assert_no_text expired2['name']   # not readable by this user
     assert_no_text 'foo_file'         # not trash
@@ -60,12 +62,21 @@ class TrashTest < ActionDispatch::IntegrationTest
     visit page_with_token('active', "/trash")
 
     assert_text deleted['name']
+    assert_text deleted['uuid']
+    assert_text deleted['portable_data_hash']
     assert_text expired['name']
 
     page.find_field('Search trash').set 'expired'
 
-    assert_text expired['name']
     assert_no_text deleted['name']
+    assert_text expired['name']
+
+    page.find_field('Search trash').set deleted['portable_data_hash'][0..9]
+
+    assert_no_text expired['name']
+    assert_text deleted['name']
+    assert_text deleted['uuid']
+    assert_text deleted['portable_data_hash']
 
     click_button 'Selection...'
     within('.selection-action-container') do
index cf7755b68de780631cee4319ea720160146ffdff..5fc494cdf5aad3608cd1f7b7eafb2c5bd19035d0 100755 (executable)
@@ -365,6 +365,15 @@ fpm_build () {
       COMMAND_ARR+=(--deb-ignore-iteration-in-dependencies)
   fi
 
+  # 12271 - As FPM-generated packages don't include scripts by default, the
+  # packages cleanup on upgrade depends on files being listed on the %files
+  # section in the generated SPEC files. To remove DIRECTORIES, they need to
+  # be listed in that sectiontoo, so we need to add this parameter to properly
+  # remove lingering dirs.
+  if [[ rpm = "$FORMAT" ]]; then
+    COMMAND_ARR+=('--rpm-auto-add-directories')
+  fi
+
   if [[ "${DEBUG:-0}" != "0" ]]; then
     COMMAND_ARR+=('--verbose' '--log' 'info')
   fi
index 6b736a5a7d872ff60eae3bdeffc1e55c66de40c0..0513ca02ec68ca2d0fe5f58ced9ab1c98c124844 100644 (file)
@@ -44,7 +44,10 @@ def arv_docker_get_image(api_client, dockerRequirement, pull_image, project_uuid
 
         if not images:
             # Fetch Docker image if necessary.
-            cwltool.docker.get_image(dockerRequirement, pull_image)
+            try:
+                cwltool.docker.get_image(dockerRequirement, pull_image)
+            except OSError as e:
+                raise WorkflowException("While trying to get Docker image '%s', failed to execute 'docker': %s" % (dockerRequirement["dockerImageId"], e))
 
             # Upload image to Arvados
             args = []
index d104d56e91e7453b5605a0222e17818184a42cb2..794c99446c7bdf5ce442d714816d93f150a623ce 100644 (file)
@@ -32,7 +32,7 @@ from ._version import __version__
 logger = logging.getLogger('arvados.cwl-runner')
 metrics = logging.getLogger('arvados.cwl-runner.metrics')
 
-crunchrunner_re = re.compile(r"^\S+ \S+ \d+ \d+ stderr \S+ \S+ crunchrunner: \$\(task\.(tmpdir|outdir|keep)\)=(.*)")
+crunchrunner_re = re.compile(r"^.*crunchrunner: \$\(task\.(tmpdir|outdir|keep)\)=(.*)$")
 
 crunchrunner_git_commit = 'a3f2cb186e437bfce0031b024b2157b73ed2717d'
 
@@ -222,12 +222,13 @@ class ArvadosJob(object):
                                                                    keep_client=self.arvrunner.keep_client,
                                                                    num_retries=self.arvrunner.num_retries)
                         log = logc.open(logc.keys()[0])
-                        dirs = {}
-                        tmpdir = None
-                        outdir = None
-                        keepdir = None
+                        dirs = {
+                            "tmpdir": "/tmpdir",
+                            "outdir": "/outdir",
+                            "keep": "/keep"
+                        }
                         for l in log:
-                            # Determine the tmpdir, outdir and keepdir paths from
+                            # Determine the tmpdir, outdir and keep paths from
                             # the job run.  Unfortunately, we can't take the first
                             # values we find (which are expected to be near the
                             # top) and stop scanning because if the node fails and
index 236658c1f1e7c0a18b7675078374f33b5e9e7db5..53a0f947b9d1cea00f366e27620cf03bcc1c1769 100755 (executable)
@@ -84,8 +84,8 @@ if test "$tag" = "latest" ; then
   arv-keepdocker --pull arvados/jobs $tag
 else
   jobsimg=\$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
-  arv-keepdocker --pull arvados/jobs $jobsimg
-  docker tag -f arvados/jobs:$jobsimg arvados/jobs:latest
+  arv-keepdocker --pull arvados/jobs \$jobsimg
+  docker tag arvados/jobs:\$jobsimg arvados/jobs:latest
   arv-keepdocker arvados/jobs latest
 fi
 
index 4a5c09ba793cdfb9d37609d6b9f4392d7f1c18db..726b81362ca6d4e9b1db43f6f8677111039911b3 100644 (file)
@@ -35,7 +35,7 @@ func (this HashCheckingReader) Read(p []byte) (n int, err error) {
                this.Hash.Write(p[:n])
        }
        if err == io.EOF {
-               sum := this.Hash.Sum(make([]byte, 0, this.Hash.Size()))
+               sum := this.Hash.Sum(nil)
                if fmt.Sprintf("%x", sum) != this.Check {
                        err = BadChecksum
                }
@@ -43,8 +43,9 @@ func (this HashCheckingReader) Read(p []byte) (n int, err error) {
        return n, err
 }
 
-// WriteTo writes the entire contents of this.Reader to dest.  Returns
-// BadChecksum if the checksum doesn't match.
+// WriteTo writes the entire contents of this.Reader to dest. Returns
+// BadChecksum if writing is successful but the checksum doesn't
+// match.
 func (this HashCheckingReader) WriteTo(dest io.Writer) (written int64, err error) {
        if writeto, ok := this.Reader.(io.WriterTo); ok {
                written, err = writeto.WriteTo(io.MultiWriter(dest, this.Hash))
@@ -52,13 +53,16 @@ func (this HashCheckingReader) WriteTo(dest io.Writer) (written int64, err error
                written, err = io.Copy(io.MultiWriter(dest, this.Hash), this.Reader)
        }
 
-       sum := this.Hash.Sum(make([]byte, 0, this.Hash.Size()))
+       if err != nil {
+               return written, err
+       }
 
+       sum := this.Hash.Sum(nil)
        if fmt.Sprintf("%x", sum) != this.Check {
-               err = BadChecksum
+               return written, BadChecksum
        }
 
-       return written, err
+       return written, nil
 }
 
 // Close reads all remaining data from the underlying Reader and
@@ -77,7 +81,7 @@ func (this HashCheckingReader) Close() (err error) {
                return err
        }
 
-       sum := this.Hash.Sum(make([]byte, 0, this.Hash.Size()))
+       sum := this.Hash.Sum(nil)
        if fmt.Sprintf("%x", sum) != this.Check {
                err = BadChecksum
        }
index db748ee98ed2b247d37f21b931b03283f5e70bb1..44345afda61c5535ee1bfa1232c3369827f00be5 100644 (file)
@@ -8,9 +8,10 @@ import (
        "bytes"
        "crypto/md5"
        "fmt"
-       . "gopkg.in/check.v1"
        "io"
        "io/ioutil"
+
+       . "gopkg.in/check.v1"
 )
 
 type HashcheckSuiteSuite struct{}
@@ -86,4 +87,17 @@ func (h *HashcheckSuiteSuite) TestWriteTo(c *C) {
                c.Check(err, Equals, BadChecksum)
                <-done
        }
+
+       // If WriteTo stops early due to a write error, return the
+       // write error (not "bad checksum").
+       {
+               input := bytes.NewBuffer(make([]byte, 1<<26))
+               hcr := HashCheckingReader{input, md5.New(), hash}
+               r, w := io.Pipe()
+               r.Close()
+               n, err := hcr.WriteTo(w)
+               c.Check(n, Equals, int64(0))
+               c.Check(err, NotNil)
+               c.Check(err, Not(Equals), BadChecksum)
+       }
 }
index 1d9a821d131a4721bf461ff613531748c38d95fb..b0efbc7cb0c2c84c5ed4fb705b1e7dc5e88b5138 100644 (file)
@@ -129,8 +129,8 @@ class Commit < ActiveRecord::Base
   end
 
   # Given a repository (url, or name of hosted repo) and commit sha1,
-  # copy the commit into the internal git repo and tag it with the
-  # given tag (typically a job UUID).
+  # copy the commit into the internal git repo (if necessary), and tag
+  # it with the given tag (typically a job UUID).
   #
   # The repo can be a remote url, but in this case sha1 must already
   # be present in our local cache for that repo: e.g., sha1 was just
@@ -147,11 +147,45 @@ class Commit < ActiveRecord::Base
       raise ArgumentError.new "no local repository for #{repo_name}"
     end
     dst_gitdir = Rails.configuration.git_internal_dir
-    must_pipe("echo #{sha1.shellescape}",
-              "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
-              "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
-    must_git(dst_gitdir,
-             "tag --force #{tag.shellescape} #{sha1.shellescape}")
+
+    begin
+      commit_in_dst = must_git(dst_gitdir, "log -n1 --format=%H #{sha1.shellescape}^{commit}").strip
+    rescue GitError
+      commit_in_dst = false
+    end
+
+    tag_cmd = "tag --force #{tag.shellescape} #{sha1.shellescape}^{commit}"
+    if commit_in_dst == sha1
+      must_git(dst_gitdir, tag_cmd)
+    else
+      # git-fetch is faster than pack-objects|unpack-objects, but
+      # git-fetch can't fetch by sha1. So we first try to fetch a
+      # branch that has the desired commit, and if that fails (there
+      # is no such branch, or the branch we choose changes under us in
+      # race), we fall back to pack|unpack.
+      begin
+        branches = must_git(src_gitdir,
+                            "branch --contains #{sha1.shellescape}")
+        m = branches.match(/^. (\w+)\n/)
+        if !m
+          raise GitError.new "commit is not on any branch"
+        end
+        branch = m[1]
+        must_git(dst_gitdir,
+                 "fetch file://#{src_gitdir.shellescape} #{branch.shellescape}")
+        # Even if all of the above steps succeeded, we might still not
+        # have the right commit due to a race, in which case tag_cmd
+        # will fail, and we'll need to fall back to pack|unpack. So
+        # don't be tempted to condense this tag_cmd and the one in the
+        # rescue block into a single attempt.
+        must_git(dst_gitdir, tag_cmd)
+      rescue GitError
+        must_pipe("echo #{sha1.shellescape}",
+                  "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+                  "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+        must_git(dst_gitdir, tag_cmd)
+      end
+    end
   end
 
   protected
@@ -213,14 +247,16 @@ class Commit < ActiveRecord::Base
     # Clear token in case a git helper tries to use it as a password.
     orig_token = ENV['ARVADOS_API_TOKEN']
     ENV['ARVADOS_API_TOKEN'] = ''
+    last_output = ''
     begin
       git = "git --git-dir #{gitdir.shellescape}"
       cmds.each do |cmd|
-        must_pipe git+" "+cmd
+        last_output = must_pipe git+" "+cmd
       end
     ensure
       ENV['ARVADOS_API_TOKEN'] = orig_token
     end
+    return last_output
   end
 
   def self.must_pipe *cmds
@@ -229,5 +265,6 @@ class Commit < ActiveRecord::Base
     if not $?.success?
       raise GitError.new "#{cmd}: #{$?}: #{out}"
     end
+    return out
   end
 end
index 0aeca6db826db3fdf9dcb5c303af4c4d21d5ede4..83765fb1dc5571b4454ff8143a1545321f34c91c 100644 (file)
@@ -385,7 +385,7 @@ class Container < ArvadosModel
       when Running
         permitted.push :finished_at, :output, :log
       when Queued, Locked
-        permitted.push :finished_at
+        permitted.push :finished_at, :log
       end
 
     else
index 230f03e5777a56aec28b57bda56dcea4e82b065f..3cabc1e3ce75842d6e187a7f99ab6a12dd510d84 100644 (file)
@@ -429,8 +429,11 @@ class CrunchDispatch
         i, o, e, t = Open3.popen3(*cmd_args)
       rescue
         $stderr.puts "dispatch: popen3: #{$!}"
-        sleep 1
-        next
+        # This is a dispatch problem like "Too many open files";
+        # retrying another job right away would be futile. Just return
+        # and hope things are better next time, after (at least) a
+        # did_recently() delay.
+        return
       end
 
       $stderr.puts "dispatch: job #{job.uuid}"
@@ -633,31 +636,11 @@ class CrunchDispatch
     pid_done = nil
     j_done = nil
 
-    if false
-      begin
-        pid_done = waitpid(-1, Process::WNOHANG | Process::WUNTRACED)
-        if pid_done
-          j_done = @running.values.
-            select { |j| j[:wait_thr].pid == pid_done }.
-            first
-        end
-      rescue SystemCallError
-        # I have @running processes but system reports I have no
-        # children. This is likely to happen repeatedly if it happens at
-        # all; I will log this no more than once per child process I
-        # start.
-        if 0 < @running.select { |uuid,j| j[:warned_waitpid_error].nil? }.size
-          children = @running.values.collect { |j| j[:wait_thr].pid }.join ' '
-          $stderr.puts "dispatch: IPC bug: waitpid() error (#{$!}), but I have children #{children}"
-        end
-        @running.each do |uuid,j| j[:warned_waitpid_error] = true end
-      end
-    else
-      @running.each do |uuid, j|
-        if j[:wait_thr].status == false
-          pid_done = j[:wait_thr].pid
-          j_done = j
-        end
+    @running.each do |uuid, j|
+      if !j[:wait_thr].status
+        pid_done = j[:wait_thr].pid
+        j_done = j
+        break
       end
     end
 
index 19639b348a29de339dbeb56361d62ee721841cfd..673e0e248fd0e5e63fbe95a97f85a6d7a2cd3b79 100644 (file)
@@ -27,12 +27,13 @@ module GitTestHelper
       system("tar", "-xC", @tmpdir.to_s, "-f", "test/test.git.tar")
       Rails.configuration.git_repositories_dir = "#{@tmpdir}/test"
 
-      intdir = Rails.configuration.git_internal_dir
-      if not File.exist? intdir
-        FileUtils.mkdir_p intdir
-        IO.read("|git --git-dir #{intdir.to_s.shellescape} init")
-        assert $?.success?
-      end
+      # Initialize an empty internal git repo.
+      intdir =
+        Rails.configuration.git_internal_dir =
+        Rails.root.join(@tmpdir, 'internal.git').to_s
+      FileUtils.mkdir_p intdir
+      IO.read("|git --git-dir #{intdir.shellescape} init")
+      assert $?.success?
     end
 
     base.teardown do
index ec7a0b9a5b9eff5a7a625cf0ccecf5b100bc2951..af365b19e2e224b70946982cabe59c2c2fd8cb77 100644 (file)
@@ -26,6 +26,14 @@ class CommitTest < ActiveSupport::TestCase
     end
   end
 
+  def must_pipe(cmd)
+    begin
+      return IO.read("|#{cmd}")
+    ensure
+      assert $?.success?
+    end
+  end
+
   [
    'https://github.com/curoverse/arvados.git',
    'http://github.com/curoverse/arvados.git',
@@ -79,6 +87,47 @@ class CommitTest < ActiveSupport::TestCase
     assert $?.success?
   end
 
+  def with_foo_repository
+    Dir.chdir("#{Rails.configuration.git_repositories_dir}/#{repositories(:foo).uuid}") do
+      must_pipe("git checkout master 2>&1")
+      yield
+    end
+  end
+
+  test 'tag_in_internal_repository, new non-tip sha1 in local repo' do
+    tag = "tag#{rand(10**10)}"
+    sha1 = nil
+    with_foo_repository do
+      must_pipe("git checkout -b branch-#{rand(10**10)} 2>&1")
+      must_pipe("echo -n #{tag.shellescape} >bar")
+      must_pipe("git add bar")
+      must_pipe("git -c user.email=x@x -c user.name=X commit -m -")
+      sha1 = must_pipe("git log -n1 --format=%H").strip
+      must_pipe("git rm bar")
+      must_pipe("git -c user.email=x@x -c user.name=X commit -m -")
+    end
+    Commit.tag_in_internal_repository 'active/foo', sha1, tag
+    gitint = "git --git-dir #{Rails.configuration.git_internal_dir.shellescape}"
+    assert_match(/^commit /, IO.read("|#{gitint} show #{tag.shellescape}"))
+    assert $?.success?
+  end
+
+  test 'tag_in_internal_repository, new unreferenced sha1 in local repo' do
+    tag = "tag#{rand(10**10)}"
+    sha1 = nil
+    with_foo_repository do
+      must_pipe("echo -n #{tag.shellescape} >bar")
+      must_pipe("git add bar")
+      must_pipe("git -c user.email=x@x -c user.name=X commit -m -")
+      sha1 = must_pipe("git log -n1 --format=%H").strip
+      must_pipe("git reset --hard HEAD^")
+    end
+    Commit.tag_in_internal_repository 'active/foo', sha1, tag
+    gitint = "git --git-dir #{Rails.configuration.git_internal_dir.shellescape}"
+    assert_match(/^commit /, IO.read("|#{gitint} show #{tag.shellescape}"))
+    assert $?.success?
+  end
+
   # In active/shabranchnames, "7387838c69a21827834586cc42b467ff6c63293b" is
   # both a commit hash, and the name of a branch that begins from that same
   # commit.
index e8eff0adeae7382a734c88479aa688393a42fd6a..09373fdc05588ea593a0ff33906484d47be082ef 100644 (file)
@@ -455,6 +455,17 @@ class ContainerTest < ActiveSupport::TestCase
     check_no_change_from_cancelled c
   end
 
+  test "Container locked cancel with log" do
+    c, _ = minimal_new
+    set_user_from_auth :dispatch1
+    assert c.lock, show_errors(c)
+    assert c.update_attributes(
+             state: Container::Cancelled,
+             log: collections(:real_log_collection).portable_data_hash,
+           ), show_errors(c)
+    check_no_change_from_cancelled c
+  end
+
   test "Container running cancel" do
     c, _ = minimal_new
     set_user_from_auth :dispatch1
index 30770fc0152a58125495420ebed2d8836768cfd3..418f748fe1a25fe65232abd904312bf44267992d 100644 (file)
@@ -159,8 +159,8 @@ class InodeCache(object):
             if obj.in_use():
                 _logger.debug("InodeCache cannot clear inode %i, in use", obj.inode)
                 return
+            obj.kernel_invalidate()
             if obj.has_ref(True):
-                obj.kernel_invalidate()
                 _logger.debug("InodeCache sent kernel invalidate inode %i", obj.inode)
                 return
             obj.clear()
@@ -266,17 +266,22 @@ class Inodes(object):
             del self._entries[entry.inode]
             with llfuse.lock_released:
                 entry.finalize()
-            self.invalidate_inode(entry.inode)
             entry.inode = None
         else:
             entry.dead = True
             _logger.debug("del_entry on inode %i with refcount %i", entry.inode, entry.ref_count)
 
-    def invalidate_inode(self, inode):
-        llfuse.invalidate_inode(inode)
+    def invalidate_inode(self, entry):
+        if entry.has_ref(False):
+            # Only necessary if the kernel has previously done a lookup on this
+            # inode and hasn't yet forgotten about it.
+            llfuse.invalidate_inode(entry.inode)
 
-    def invalidate_entry(self, inode, name):
-        llfuse.invalidate_entry(inode, name.encode(self.encoding))
+    def invalidate_entry(self, entry, name):
+        if entry.has_ref(False):
+            # Only necessary if the kernel has previously done a lookup on this
+            # inode and hasn't yet forgotten about it.
+            llfuse.invalidate_entry(entry.inode, name.encode(self.encoding))
 
     def clear(self):
         self.inode_cache.clear()
@@ -432,8 +437,8 @@ class Operations(llfuse.Operations):
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.generation = 0
-        entry.entry_timeout = 60 if e.allow_dirent_cache else 0
-        entry.attr_timeout = 60 if e.allow_attr_cache else 0
+        entry.entry_timeout = 0
+        entry.attr_timeout = e.time_to_next_poll() if e.allow_attr_cache else 0
 
         entry.st_mode = stat.S_IRUSR | stat.S_IRGRP | stat.S_IROTH
         if isinstance(e, Directory):
index a51dd909b690df3cb39865d021b8f4daea4b471b..8b680f0663d25cf423e68251f1a82b8ed7384bc2 100644 (file)
@@ -70,8 +70,9 @@ class FreshBase(object):
         self.dead = False
         self.cache_size = 0
         self.cache_uuid = None
+
+        # Can the kernel cache attributes?
         self.allow_attr_cache = True
-        self.allow_dirent_cache = True
 
     def invalidate(self):
         """Indicate that object contents should be refreshed from source."""
@@ -142,3 +143,13 @@ class FreshBase(object):
 
     def child_event(self, ev):
         pass
+
+    def time_to_next_poll(self):
+        if self._poll:
+            t = (self._last_update + self._poll_time) - self._atime
+            if t < 0:
+                return 0
+            else:
+                return t
+        else:
+            return self._poll_time
index 0178fe5544b07ddb30b9fa9c8e08d734a12cde0c..7bd00d5862434891767ba0be14823a902e38a48f 100644 (file)
@@ -150,12 +150,12 @@ class Directory(FreshBase):
         # delete any other directory entries that were not in found in 'items'
         for i in oldentries:
             _logger.debug("Forgetting about entry '%s' on inode %i", i, self.inode)
-            self.inodes.invalidate_entry(self.inode, i.encode(self.inodes.encoding))
+            self.inodes.invalidate_entry(self, i)
             self.inodes.del_entry(oldentries[i])
             changed = True
 
         if changed:
-            self.inodes.invalidate_inode(self.inode)
+            self.inodes.invalidate_inode(self)
             self._mtime = time.time()
 
         self.fresh()
@@ -182,16 +182,21 @@ class Directory(FreshBase):
         self._entries = {}
         for n in oldentries:
             oldentries[n].clear()
-            self.inodes.invalidate_entry(self.inode, n.encode(self.inodes.encoding))
             self.inodes.del_entry(oldentries[n])
-        self.inodes.invalidate_inode(self.inode)
         self.invalidate()
 
     def kernel_invalidate(self):
-        for n, e in self._entries.iteritems():
-            self.inodes.invalidate_entry(self.inode, n.encode(self.inodes.encoding))
-            e.kernel_invalidate()
-        self.inodes.invalidate_inode(self.inode)
+        # Invalidating the dentry on the parent implies invalidating all paths
+        # below it as well.
+        parent = self.inodes[self.parent_inode]
+
+        # Find self on the parent in order to invalidate this path.
+        # Calling the public items() method might trigger a refresh,
+        # which we definitely don't want, so read the internal dict directly.
+        for k,v in parent._entries.items():
+            if v is self:
+                self.inodes.invalidate_entry(parent, k)
+                break
 
     def mtime(self):
         return self._mtime
@@ -266,13 +271,13 @@ class CollectionDirectoryBase(Directory):
                 elif event == arvados.collection.DEL:
                     ent = self._entries[name]
                     del self._entries[name]
-                    self.inodes.invalidate_entry(self.inode, name.encode(self.inodes.encoding))
+                    self.inodes.invalidate_entry(self, name)
                     self.inodes.del_entry(ent)
                 elif event == arvados.collection.MOD:
                     if hasattr(item, "fuse_entry") and item.fuse_entry is not None:
-                        self.inodes.invalidate_inode(item.fuse_entry.inode)
+                        self.inodes.invalidate_inode(item.fuse_entry)
                     elif name in self._entries:
-                        self.inodes.invalidate_inode(self._entries[name].inode)
+                        self.inodes.invalidate_inode(self._entries[name])
 
     def populate(self, mtime):
         self._mtime = mtime
@@ -547,7 +552,7 @@ class TmpCollectionDirectory(CollectionDirectoryBase):
         if self.collection_record_file:
             with llfuse.lock:
                 self.collection_record_file.invalidate()
-            self.inodes.invalidate_inode(self.collection_record_file.inode)
+            self.inodes.invalidate_inode(self.collection_record_file)
             _logger.debug("%s invalidated collection record", self)
 
     def collection_record(self):
@@ -639,6 +644,7 @@ will appear if it exists.
             return False
 
         try:
+            e = None
             e = self.inodes.add_entry(CollectionDirectory(
                     self.inode, self.inodes, self.api, self.num_retries, k))
 
@@ -649,12 +655,13 @@ will appear if it exists.
                     self.inodes.del_entry(e)
                 return True
             else:
-                self.inodes.invalidate_entry(self.inode, k)
+                self.inodes.invalidate_entry(self, k)
                 self.inodes.del_entry(e)
                 return False
         except Exception as ex:
-            _logger.debug('arv-mount exception keep %s', ex)
-            self.inodes.del_entry(e)
+            _logger.exception("arv-mount lookup '%s':", k)
+            if e is not None:
+                self.inodes.del_entry(e)
             return False
 
     def __getitem__(self, item):
@@ -963,7 +970,7 @@ class ProjectDirectory(Directory):
         # Acually move the entry from source directory to this directory.
         del src._entries[name_old]
         self._entries[name_new] = ent
-        self.inodes.invalidate_entry(src.inode, name_old.encode(self.inodes.encoding))
+        self.inodes.invalidate_entry(src, name_old)
 
     @use_counter
     def child_event(self, ev):
@@ -1000,7 +1007,7 @@ class ProjectDirectory(Directory):
             if old_name in self._entries:
                 ent = self._entries[old_name]
                 del self._entries[old_name]
-                self.inodes.invalidate_entry(self.inode, old_name.encode(self.inodes.encoding))
+                self.inodes.invalidate_entry(self, old_name)
 
             if new_name:
                 if ent is not None:
index 8189a19742b3311f1c720a241e55fa2a30846395..585536176007bdfcc889a47647f85114e6a34fb7 100644 (file)
@@ -122,12 +122,11 @@ class FuncToJSONFile(StringFile):
         super(FuncToJSONFile, self).__init__(parent_inode, "", 0)
         self.func = func
 
-        # invalidate_inode() and invalidate_entry() are asynchronous
-        # with no callback to wait for. In order to guarantee
-        # userspace programs don't get stale data that was generated
-        # before the last invalidate(), we must disallow dirent
+        # invalidate_inode() is asynchronous with no callback to wait for. In
+        # order to guarantee userspace programs don't get stale data that was
+        # generated before the last invalidate(), we must disallow inode
         # caching entirely.
-        self.allow_dirent_cache = False
+        self.allow_attr_cache = False
 
     def size(self):
         self._update()
index 89925ba6f8fd4174e6acaf818e699d335cff91f9..6d535eaed4dd73b31a91bd6e60774f6eac9ee4d7 100755 (executable)
@@ -51,6 +51,7 @@ VAR_DATA="$ARVBOX_DATA/var"
 PASSENGER="$ARVBOX_DATA/passenger"
 GEMS="$ARVBOX_DATA/gems"
 PIPCACHE="$ARVBOX_DATA/pip"
+NPMCACHE="$ARVBOX_DATA/npm"
 GOSTUFF="$ARVBOX_DATA/gopath"
 
 getip() {
@@ -183,7 +184,7 @@ run() {
         updateconf
         wait_for_arvbox
     else
-        mkdir -p "$PG_DATA" "$VAR_DATA" "$PASSENGER" "$GEMS" "$PIPCACHE" "$GOSTUFF"
+        mkdir -p "$PG_DATA" "$VAR_DATA" "$PASSENGER" "$GEMS" "$PIPCACHE" "$NPMCACHE" "$GOSTUFF"
 
 
         if ! test -d "$ARVADOS_ROOT" ; then
@@ -209,6 +210,7 @@ run() {
                        "--volume=$PASSENGER:/var/lib/passenger:rw" \
                        "--volume=$GEMS:/var/lib/gems:rw" \
                        "--volume=$PIPCACHE:/var/lib/pip:rw" \
+                       "--volume=$NPMCACHE:/var/lib/npm:rw" \
                        "--volume=$GOSTUFF:/var/lib/gopath:rw" \
                        arvados/arvbox-dev$TAG \
                        /usr/local/bin/runsvinit -svdir=/etc/test-service
@@ -249,6 +251,7 @@ run() {
                    "--volume=$PASSENGER:/var/lib/passenger:rw" \
                    "--volume=$GEMS:/var/lib/gems:rw" \
                    "--volume=$PIPCACHE:/var/lib/pip:rw" \
+                   "--volume=$NPMCACHE:/var/lib/npm:rw" \
                    "--volume=$GOSTUFF:/var/lib/gopath:rw" \
                    $PUBLIC \
                    arvados/arvbox-dev$TAG
index bf42951300e9e375187878ebd17eac97da2dad2d..39d5dafd0bca95bb20108f3fdd405de4337298e1 100644 (file)
@@ -65,10 +65,13 @@ RUN cd /root && \
     GOPATH=$PWD go get github.com/curoverse/runsvinit && \
     install bin/runsvinit /usr/local/bin
 
-ENV PJSVERSION=1.9.7
+ENV PJSVERSION=1.9.8
+# bitbucket is the origin, but downloads fail sometimes, so use our own mirror instead.
+#ENV PJSURL=https://bitbucket.org/ariya/phantomjs/downloads/phantomjs-${PJSVERSION}-linux-x86_64.tar.bz2
+ENV PJSURL=http://cache.arvados.org/phantomjs-${PJSVERSION}-linux-x86_64.tar.bz2
 
 RUN set -e && \
- curl -L -f http://cache.arvados.org/phantomjs-${PJSVERSION}-linux-x86_64.tar.bz2 | tar -C /usr/local -xjf - && \
+ curl -L -f ${PJSURL} | tar -C /usr/local -xjf - && \
  ln -s ../phantomjs-${PJSVERSION}-linux-x86_64/bin/phantomjs /usr/local/bin
 
 RUN pip install -U setuptools
@@ -79,6 +82,8 @@ ENV NODEVERSION v6.11.2
 RUN curl -L -f https://nodejs.org/dist/${NODEVERSION}/node-${NODEVERSION}-linux-x64.tar.xz | tar -C /usr/local -xJf - && \
     ln -s ../node-${NODEVERSION}-linux-x64/bin/node ../node-${NODEVERSION}-linux-x64/bin/npm /usr/local/bin
 
+RUN echo en_US.UTF-8 UTF-8 > /etc/locale.gen && locale-gen
+
 ARG arvados_version
 RUN echo arvados_version is git commit $arvados_version
 
index 8792d316f00211ff0be21510c60023cf7136c610..62225df6ceae523493a27450466345c28d4bc7a5 100644 (file)
@@ -6,6 +6,8 @@
 export PATH=${PATH}:/usr/local/go/bin:/var/lib/gems/bin
 export GEM_HOME=/var/lib/gems
 export GEM_PATH=/var/lib/gems
+export npm_config_cache=/var/lib/npm
+export npm_config_cache_min=Infinity
 
 if test -s /var/run/localip_override ; then
     localip=$(cat /var/run/localip_override)
@@ -65,7 +67,7 @@ pip_install() {
     done
     popd
 
-    if ! pip install --no-index --find-links /var/lib/pip --system $1 ; then
-        pip install --system $1
+    if ! pip install --no-index --find-links /var/lib/pip $1 ; then
+        pip install $1
     fi
 }
index f717ea99b5c7f49bb41e34b616a30da1fadf280d..3296a3cd178779cb956d87250748a71e9ae8d8ad 100755 (executable)
@@ -10,7 +10,8 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
     HOSTGID=$(ls -nd /usr/src/arvados | sed 's/ */ /' | cut -d' ' -f5)
 
     mkdir -p /var/lib/arvados/git /var/lib/gems \
-          /var/lib/passenger /var/lib/gopath /var/lib/pip
+          /var/lib/passenger /var/lib/gopath \
+          /var/lib/pip /var/lib/npm
 
     groupadd --gid $HOSTGID --non-unique arvbox
     groupadd --gid $HOSTGID --non-unique git
@@ -25,7 +26,7 @@ if ! grep "^arvbox:" /etc/passwd >/dev/null 2>/dev/null ; then
     chown arvbox:arvbox -R /usr/local /var/lib/arvados /var/lib/gems \
           /var/lib/passenger /var/lib/postgresql \
           /var/lib/nginx /var/log/nginx /etc/ssl/private \
-          /var/lib/gopath /var/lib/pip
+          /var/lib/gopath /var/lib/pip /var/lib/npm
 
     mkdir -p /var/lib/gems/ruby
     chown arvbox:arvbox -R /var/lib/gems/ruby
index 9137aa03af5ed5223099edf59c0339e0d5b95446..a0771aa6a04a9ba007b49c85e298f8f44c9cc7d6 100755 (executable)
@@ -9,7 +9,7 @@ set -eux -o pipefail
 PGVERSION=9.6
 
 if ! test -d /var/lib/postgresql/$PGVERSION/main ; then
-    /usr/lib/postgresql/$PGVERSION/bin/initdb -D /var/lib/postgresql/$PGVERSION/main
+    /usr/lib/postgresql/$PGVERSION/bin/initdb --locale=en_US.UTF-8 -D /var/lib/postgresql/$PGVERSION/main
     sh -c "while ! (psql postgres -c'\du' | grep '^ arvbox ') >/dev/null ; do createuser -s arvbox ; sleep 1 ; done" &
 fi
 mkdir -p /var/run/postgresql/$PGVERSION-main.pg_stat_tmp
index 29bd040d6573cc91126ea7604285ec79322c2e12..0e937908e8116f5984c367e4cee25fb718b3f833 100755 (executable)
@@ -18,6 +18,18 @@ cd /usr/src/arvados/sdk/cli
 run_bundler --binstubs=$PWD/binstubs
 ln -sf /usr/src/arvados/sdk/cli/binstubs/arv /usr/local/bin/arv
 
+# Need to install the upstream version of pip because the python-pip package
+# shipped with Debian 9 is patched to change behavior in a way that breaks our
+# use case.
+# See https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=876145
+# When a non-root user attempts to install system packages, it makes the
+# --ignore-installed flag the default (and there is no way to turn it off),
+# this has the effect of making it very hard to share dependencies shared among
+# multiple packages, because it will blindly install the latest version of each
+# dependency requested by each package, even if a compatible package version is
+# already installed.
+pip_install pip
+
 pip_install wheel
 
 cd /usr/src/arvados/sdk/python