11789: Merge branch 'master' into 11789-arvput-exclude-flag
authorLucas Di Pentima <lucas@curoverse.com>
Tue, 20 Jun 2017 20:41:36 +0000 (17:41 -0300)
committerLucas Di Pentima <lucas@curoverse.com>
Tue, 20 Jun 2017 20:41:36 +0000 (17:41 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas@curoverse.com>

48 files changed:
.licenseignore [new file with mode: 0644]
build/check-copyright-notices [new file with mode: 0755]
build/libcloud-pin.sh [moved from build/libcloud-pin with 100% similarity]
build/package-build-dockerfiles/Makefile
build/package-build-dockerfiles/centos7/Dockerfile
build/package-build-dockerfiles/debian8/Dockerfile
build/package-build-dockerfiles/ubuntu1204/Dockerfile
build/package-build-dockerfiles/ubuntu1404/Dockerfile
build/package-build-dockerfiles/ubuntu1604/Dockerfile
build/run-build-packages-python-and-ruby.sh
build/run-build-packages.sh
build/run-tests.sh
doc/start/index.html.textile.liquid.bkup [deleted file]
sdk/cwl/arvados_cwl/__init__.py
sdk/cwl/arvados_cwl/arvjob.py
sdk/cwl/arvados_cwl/done.py
sdk/cwl/test_with_arvbox.sh
sdk/cwl/tests/test_job.py
sdk/go/arvados/collection_fs.go [new file with mode: 0644]
sdk/go/arvados/collection_fs_test.go [new file with mode: 0644]
sdk/go/arvadostest/fixtures.go
sdk/go/keepclient/collectionreader.go
sdk/python/arvados/commands/put.py
sdk/python/tests/test_arv_put.py
services/api/app/controllers/arvados/v1/repositories_controller.rb
services/api/app/controllers/arvados/v1/users_controller.rb
services/api/app/models/node.rb
services/api/app/models/user.rb
services/api/lib/can_be_an_owner.rb
services/api/test/functional/arvados/v1/users_controller_test.rb
services/api/test/integration/users_test.rb
services/api/test/unit/node_test.rb
services/api/test/unit/user_test.rb
services/crunch-run/crunchrun.go
services/crunch-run/crunchrun_test.go
services/keep-web/cache.go
services/keep-web/cache_test.go
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keep-web/server_test.go
services/nodemanager/arvnodeman/computenode/driver/ec2.py
services/nodemanager/arvnodeman/daemon.py
services/nodemanager/arvnodeman/test/fake_driver.py
services/nodemanager/setup.py
services/nodemanager/tests/test_computenode_driver_ec2.py
services/nodemanager/tests/testutil.py
tools/arvbox/bin/arvbox
tools/arvbox/lib/arvbox/docker/Dockerfile.base

diff --git a/.licenseignore b/.licenseignore
new file mode 100644 (file)
index 0000000..ee5e5fd
--- /dev/null
@@ -0,0 +1,46 @@
+*agpl-3.0.txt
+apps/workbench/app/assets/javascripts/list.js
+build/package-test-dockerfiles/centos7/localrepo.repo
+build/package-test-dockerfiles/ubuntu1604/etc-apt-preferences.d-arvados
+*by-sa-3.0.txt
+*COPYING
+doc/fonts/*
+*/docker_image
+docker/jobs/apt.arvados.org.list
+*.gif
+.gitignore
+*/.gitignore
+*/.gitkeep
+*/.gitstub
+*.gz
+*.gz.report
+*.ico
+*.jpg
+*.json
+*LICENSE*.txt
+*.lock
+*.log
+*.map
+*.png
+*/proc_stat
+*/README
+*/robots.txt
+*/runit-docker/*
+*/script/rails
+sdk/cwl/tests/input/blorp.txt
+sdk/cwl/tests/tool/blub.txt
+sdk/go/manifest/testdata/*_manifest
+sdk/java/.classpath
+sdk/java/pom.xml
+sdk/java/.project
+sdk/java/.settings/org.eclipse.jdt.core.prefs
+sdk/java/src/main/resources/log4j.properties
+sdk/pam/examples/shellinabox
+sdk/pam/pam-configs/arvados
+sdk/python/tests/data/*
+services/api/config/unbound.template
+services/arv-web/sample-cgi-app/public/.htaccess
+services/arv-web/sample-cgi-app/public/index.cgi
+services/keepproxy/pkg-extras/etc/default/keepproxy
+*.tar
+tools/crunchstat-summary/tests/crunchstat_error_messages.txt
diff --git a/build/check-copyright-notices b/build/check-copyright-notices
new file mode 100755 (executable)
index 0000000..aef14fa
--- /dev/null
@@ -0,0 +1,201 @@
+#!/bin/bash
+# Copyright (C) The Arvados Authors. All rights reserved.
+#
+# SPDX-License-Identifier: AGPL-3.0
+
+set -e
+
+fix=false
+while [[ "${@}" != "" ]]
+do
+    arg=${1}
+    shift
+    case ${arg} in
+        --help)
+            cat <<EOF
+Usage: $0 [--fix] [-- git-ls-args...]
+
+Options:
+
+--fix   Insert missing copyright notices where possible.
+
+Git arguments:
+
+Arguments after "--" are passed to \`git ls-files\`; this can be used to
+restrict the set of files to check.
+
+EOF
+            exit 2
+            ;;
+        --fix)
+            fix=true
+            ;;
+        --)
+            break
+            ;;
+        *)
+            echo >&2 "Unrecognized argument '${arg}'. Try $0 --help"
+            exit 2
+            ;;
+    esac
+done
+
+fixer() {
+    want="${want}" perl -pi~ -e 'if ($. == 1) { s{^(\#\!.*\n)?}{${1}$ENV{want}\n\n} }' "${1}"
+}
+
+IFS=$'\n' read -a ignores -r -d $'\000' <.licenseignore || true
+result=0
+git ls-files -z ${@} | \
+    while read -rd $'\000' fnm
+    do
+        grepAfter=2
+        grepBefore=0
+        cs=
+        cc=
+        ce=
+        fixer=
+        if [[ ! -f ${fnm} ]] || [[ -L ${fnm} ]] || [[ ! -s ${fnm} ]]
+        then
+            continue
+        fi
+
+        ignore=
+        for pattern in "${ignores[@]}"
+        do
+            if [[ ${fnm} == ${pattern} ]]
+            then
+                ignore=1
+            fi
+        done
+        if [[ ${ignore} = 1 ]]; then continue; fi
+
+        case ${fnm} in
+            Makefile | */Makefile \
+                | *.dockerfile | */Dockerfile.* | */Dockerfile | *.dockerignore \
+                | */MANIFEST.in | */fuse.conf | */gitolite.rc \
+                | *.pl | *.pm | *.PL \
+                | *.rb | *.rb.example | *.rake | *.ru \
+                | *.gemspec | */Gemfile | */Rakefile \
+                | services/login-sync/bin/* \
+                | sdk/cli/bin/* \
+                | *.py \
+                | sdk/python/bin/arv-* \
+                | sdk/cwl/bin/* \
+                | services/nodemanager/bin/* \
+                | services/fuse/bin/* \
+                | tools/crunchstat-summary/bin/* \
+                | crunch_scripts/* \
+                | *.yaml | *.yml | *.yml.example | *.cwl \
+                | *.sh | *.service \
+                | */run | */run-service | */restart-dns-server \
+                | */nodemanager/doc/*.cfg \
+                | */nginx.conf \
+                | build/build.list)
+                fixer=fixer
+                cc="#"
+                ;;
+            *.md)
+                fixer=fixer
+                cc="[//]: #"
+                ;;
+            *.rst)
+                fixer=fixer
+                cc=".."
+                ;;
+            *.erb)
+                fixer=fixer
+                cs="<%#"
+                cc=""
+                ce=" %>"
+                ;;
+            *.liquid)
+                fixer=fixer
+                cs=$'{% comment %}\n'
+                cc=
+                ce=$'\n%{% endcomment %}'
+                grepAfter=3
+                grepBefore=1
+                ;;
+            *.textile)
+                fixer=fixer
+                cs="###."
+                cc="...."
+                ce=
+                ;;
+            *.css)
+                fixer=fixer
+                cs="/*"
+                cc=""
+                ce=" */"
+                ;;
+            *.go | *.scss | *.java | *.js | *.coffee)
+                fixer=fixer
+                cc="//"
+                ;;
+            *.sql)
+                fixer=fixer
+                cc="--"
+                ;;
+            *.html | *.svg)
+                fixer=fixer
+                cs="<!-- "
+                cc=
+                ce=" -->"
+                ;;
+            *)
+                cc="#"
+                hashbang=$(head -n1 ${fnm})
+                if [[ ${hashbang} = "#!/bin/sh" ]] ||  [[ ${hashbang} = "#!/bin/bash" ]]
+                then
+                    fixer=fixer
+                fi
+                ;;
+        esac
+        wantGPL="${cs:-${cc}} Copyright (C) The Arvados Authors. All rights reserved.
+${cc}
+${cc}${cc:+ }SPDX-License-Identifier: AGPL-3.0${ce}"
+        wantApache="${cs:-${cc}} Copyright (C) The Arvados Authors. All rights reserved.
+${cc}
+${cc}${cc:+ }SPDX-License-Identifier: Apache-2.0${ce}"
+        wantBYSA="${cs:-${cc}} Copyright (C) The Arvados Authors. All rights reserved.
+${cc}
+${cc}${cc:+ }SPDX-License-Identifier: CC-BY-SA-3.0${ce}"
+        found=$(head "$fnm" | egrep -A${grepAfter} -B${grepBefore} 'Copyright.*Arvados' || true)
+        case ${fnm} in
+            Makefile | build/* | lib/* | tools/* | apps/* | services/*)
+                want=${wantGPL}
+                ;;
+            crunch_scripts/* | backports/* | docker/* | sdk/*)
+                want=${wantApache}
+                ;;
+            README.md | doc/*)
+                want=${wantBYSA}
+                ;;
+            *)
+                want=
+                ;;
+        esac
+        case "$found" in
+            "$wantGPL")
+                ;;
+            "$wantApache")
+                ;;
+            "$wantBYSA")
+                ;;
+            "")
+                if [[ -z ${found} ]] && [[ -n ${want} ]] && [[ $fix = true ]] && [[ $fixer != "" ]]
+                then
+                    ${fixer} ${fnm}
+                else
+                    echo "missing copyright notice: $fnm"
+                    result=1
+                fi
+                ;;
+            *)
+                echo "nonstandard copyright notice: $fnm '${found}'"
+                result=1
+                ;;
+        esac
+    done
+exit $result
similarity index 100%
rename from build/libcloud-pin
rename to build/libcloud-pin.sh
index 75f75ca73806469af36fef0387d822a162e4e7de..34e909580f698d43a6c1f041660ac30bbebe16ad 100644 (file)
@@ -20,7 +20,7 @@ ubuntu1604/generated: common-generated-all
        test -d ubuntu1604/generated || mkdir ubuntu1604/generated
        cp -rlt ubuntu1604/generated common-generated/*
 
-GOTARBALL=go1.7.5.linux-amd64.tar.gz
+GOTARBALL=go1.8.3.linux-amd64.tar.gz
 
 common-generated-all: common-generated/$(GOTARBALL)
 
index 08bd473695ce79a972e0a5d7f0ddde3401520090..162cf7993600ba1956f2f1e74b40107cc7bc485a 100644 (file)
@@ -5,7 +5,7 @@ MAINTAINER Brett Smith <brett@curoverse.com>
 RUN yum -q -y install make automake gcc gcc-c++ libyaml-devel patch readline-devel zlib-devel libffi-devel openssl-devel bzip2 libtool bison sqlite-devel rpm-build git perl-ExtUtils-MakeMaker libattr-devel nss-devel libcurl-devel which tar unzip scl-utils centos-release-scl postgresql-devel python-devel python-setuptools fuse-devel xz-libs git
 
 # Install golang binary
-ADD generated/go1.7.5.linux-amd64.tar.gz /usr/local/
+ADD generated/go1.8.3.linux-amd64.tar.gz /usr/local/
 RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 
 # Install RVM
index d4e77c9aa03c5f4cef936c27295edfdfc67806fe..0e4f696a4fcd7491d3ceae5699a9c44623d55497 100644 (file)
@@ -13,7 +13,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
     /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
 
 # Install golang binary
-ADD generated/go1.7.5.linux-amd64.tar.gz /usr/local/
+ADD generated/go1.8.3.linux-amd64.tar.gz /usr/local/
 RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 
 ENV WORKSPACE /arvados
index daeabc9b0372ca61792b686f4f60e531d0341dbd..ef0414a778eb46617ce508ae014c790814621a78 100644 (file)
@@ -13,7 +13,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
     /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
 
 # Install golang binary
-ADD generated/go1.7.5.linux-amd64.tar.gz /usr/local/
+ADD generated/go1.8.3.linux-amd64.tar.gz /usr/local/
 RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 
 ENV WORKSPACE /arvados
index aa92ad21504f68a280f3f4883effbd72655042a9..0d4be789ab439631f513ab4ff2e3d98e1c5ff69f 100644 (file)
@@ -13,7 +13,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
     /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
 
 # Install golang binary
-ADD generated/go1.7.5.linux-amd64.tar.gz /usr/local/
+ADD generated/go1.8.3.linux-amd64.tar.gz /usr/local/
 RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 
 ENV WORKSPACE /arvados
index 265141c6dbb6a06a7c964c7b76c04c3ad92c352a..b1ec7ce91df442d14f49af600f64cde76004cbe3 100644 (file)
@@ -13,7 +13,7 @@ RUN gpg --keyserver pool.sks-keyservers.net --recv-keys D39DC0E3 && \
     /usr/local/rvm/bin/rvm-exec default gem install cure-fpm --version 1.6.0b
 
 # Install golang binary
-ADD generated/go1.7.5.linux-amd64.tar.gz /usr/local/
+ADD generated/go1.8.3.linux-amd64.tar.gz /usr/local/
 RUN ln -s /usr/local/go/bin/go /usr/local/bin/
 
 ENV WORKSPACE /arvados
index 13aa687316a5bbcca681e3fb1e9b6f7f0c0c9022..b7c642a7ad9315cb08ba5e987f3b4146413544b8 100755 (executable)
@@ -3,7 +3,7 @@
 COLUMNS=80
 
 . `dirname "$(readlink -f "$0")"`/run-library.sh
-#. `dirname "$(readlink -f "$0")"`/libcloud-pin
+#. `dirname "$(readlink -f "$0")"`/libcloud-pin.sh
 
 read -rd "\000" helpmessage <<EOF
 $(basename $0): Build Arvados Python packages and Ruby gems
index 777cd3c844536fc43bc90e70a611c3e57ccd8feb..7763ca5454add00dbafecff621448757ad72304b 100755 (executable)
@@ -1,7 +1,7 @@
 #!/bin/bash
 
 . `dirname "$(readlink -f "$0")"`/run-library.sh
-. `dirname "$(readlink -f "$0")"`/libcloud-pin
+. `dirname "$(readlink -f "$0")"`/libcloud-pin.sh
 
 read -rd "\000" helpmessage <<EOF
 $(basename $0): Build Arvados packages
index 352d05b945ea168fb0700614c9327bfd1e3fa033..d865f77f78fdf25510afcafd17e0ecd0b0fecb9a 100755 (executable)
@@ -1,6 +1,6 @@
 #!/bin/bash
 
-. `dirname "$(readlink -f "$0")"`/libcloud-pin
+. `dirname "$(readlink -f "$0")"`/libcloud-pin.sh
 
 COLUMNS=80
 . `dirname "$(readlink -f "$0")"`/run-library.sh
@@ -169,8 +169,8 @@ sanity_checks() {
     echo -n 'go: '
     go version \
         || fatal "No go binary. See http://golang.org/doc/install"
-    [[ $(go version) =~ go1.([0-9]+) ]] && [[ ${BASH_REMATCH[1]} -ge 7 ]] \
-        || fatal "Go >= 1.7 required. See http://golang.org/doc/install"
+    [[ $(go version) =~ go1.([0-9]+) ]] && [[ ${BASH_REMATCH[1]} -ge 8 ]] \
+        || fatal "Go >= 1.8 required. See http://golang.org/doc/install"
     echo -n 'gcc: '
     gcc --version | egrep ^gcc \
         || fatal "No gcc. Try: apt-get install build-essential"
diff --git a/doc/start/index.html.textile.liquid.bkup b/doc/start/index.html.textile.liquid.bkup
deleted file mode 100644 (file)
index 339a84a..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
----
-layout: default
-navsection: start 
-title: Welcome to Arvados!
-...
-
-This guide provides an introduction to using Arvados to solve big data bioinformatics problems.
-
-
-h2. Typographic conventions
-
-This manual uses the following typographic conventions:
-
-<notextile>
-<ul>
-<li>Code blocks which are set aside from the text indicate user input to the system.  Commands that should be entered into a Unix shell are indicated by the directory where you should  enter the command ('~' indicates your home directory) followed by '$', followed by the highlighted <span class="userinput">command to enter</span> (do not enter the '$'), and possibly followed by example command output in black.  For example, the following block indicates that you should type <code>ls foo.*</code> while in your home directory and the expected output will be "foo.input" and "foo.output".
-</notextile>
-
-<div class="custom-container key-features">
-<a class="prev" href="#">‹</a>
-
-<div class="carousel">
-    <ul>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/dashboard2.png" style="width:909px; height:503px;" title="[START] After logging in, you see Workbench's dashboard."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/running2.png" style="width:909px; height:503px;" title="Pipelines describe a set of computational tasks (jobs)."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/log.png" style="width:909px; height:503px;" title="The output of all jobs is logged and stored automatically."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/graph.png" style="width:909px; height:503px;" title="Pipelines can be also viewed in auto-generated graph form."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/rerun.png" style="width:909px; height:503px;" title="Pipelines can easily be re-run..."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/chooseinputs.png" style="width:909px; height:503px;" title="...by changing parameters or picking new datasets..."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/webupload.png" style="width:909px; height:503px;" title="...which can be uploaded right in Workbench."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/collectionpage.png" style="width:909px; height:503px;" title="Collections allow sharing datasets and job outputs easily."></li>
-        <li><img class="hascaption" src="{{ site.baseurl }}/images/keyfeatures/provenance.png" style="width:909px; height:503px;" title="Data provenance is tracked automatically. [END]"></li>
-    </ul>
-</div>
-<a class="next" href="#">›</a>
-<div class="clear"></div>
-</div>
-
-<script type="text/javascript">
-(function() {
-    $(".key-features .carousel").jCarouselLite({
-        btnNext: ".key-features .next",
-        btnPrev: ".key-features .prev",
-        visible: 1,
-    });
-});
-('.hascaption').each(function() {
-    $(this).after( "<div style='background: rgba(0,0,0,0.6); color: white; padding: 1.4em;'>" + $(this).attr('title') + "</div>" ); 
-});
-</script>
index 43082dfb854b4bf073ec5024ca306517d809d6e6..f7da563cd4aca1bd650bb19d35dcd28cdab08757 100644 (file)
@@ -449,8 +449,7 @@ class ArvCwlRunner(object):
                                       name=kwargs.get("name"),
                                       on_error=kwargs.get("on_error"),
                                       submit_runner_image=kwargs.get("submit_runner_image"))
-
-        if not kwargs.get("submit") and "cwl_runner_job" not in kwargs and self.work_api == "jobs":
+        elif "cwl_runner_job" not in kwargs and self.work_api == "jobs":
             # Create pipeline for local run
             self.pipeline = self.api.pipeline_instances().create(
                 body={
index 0bf91f24a0a9804ebc93683ca9b004cafd530e32..ef9aba51da454926e8b96fdf8dcaf0ef175a711a 100644 (file)
@@ -145,6 +145,15 @@ class ArvadosJob(object):
 
             if response["state"] == "Complete":
                 logger.info("%s reused job %s", self.arvrunner.label(self), response["uuid"])
+                # Give read permission to the desired project on reused jobs
+                if response["owner_uuid"] != self.arvrunner.project_uuid:
+                    self.arvrunner.api.links().create(body={
+                        'link_class': 'permission',
+                        'name': 'can_read',
+                        'tail_uuid': self.arvrunner.project_uuid,
+                        'head_uuid': response["uuid"],
+                        }).execute(num_retries=self.arvrunner.num_retries)
+
                 with Perf(metrics, "done %s" % self.name):
                     self.done(response)
             else:
index 69b074cc73ccb02d0b4c12e096075c12f2d02871..48466f00c242b04e3bd143f8b319d0004c10eb45 100644 (file)
@@ -3,39 +3,43 @@ from cwltool.errors import WorkflowException
 from collections import deque
 
 def done(self, record, tmpdir, outdir, keepdir):
-    colname = "Output %s of %s" % (record["output"][0:7], self.name)
+    cols = [
+        ("output", "Output %s of %s" % (record["output"][0:7], self.name), record["output"]),
+        ("log", "Log of %s" % (record["uuid"]), record["log"])
+    ]
 
-    # check if collection already exists with same owner, name and content
-    collection_exists = self.arvrunner.api.collections().list(
-        filters=[["owner_uuid", "=", self.arvrunner.project_uuid],
-                 ['portable_data_hash', '=', record["output"]],
-                 ["name", "=", colname]]
-    ).execute(num_retries=self.arvrunner.num_retries)
-
-    if not collection_exists["items"]:
-        # Create a collection located in the same project as the
-        # pipeline with the contents of the output.
-        # First, get output record.
-        collections = self.arvrunner.api.collections().list(
-            limit=1,
-            filters=[['portable_data_hash', '=', record["output"]]],
-            select=["manifest_text"]
+    for coltype, colname, colpdh in cols:
+        # check if collection already exists with same owner, name and content
+        collection_exists = self.arvrunner.api.collections().list(
+            filters=[["owner_uuid", "=", self.arvrunner.project_uuid],
+                     ['portable_data_hash', '=', colpdh],
+                     ["name", "=", colname]]
         ).execute(num_retries=self.arvrunner.num_retries)
 
-        if not collections["items"]:
-            raise WorkflowException(
-                "[job %s] output '%s' cannot be found on API server" % (
-                    self.name, record["output"]))
+        if not collection_exists["items"]:
+            # Create a collection located in the same project as the
+            # pipeline with the contents of the output/log.
+            # First, get output/log record.
+            collections = self.arvrunner.api.collections().list(
+                limit=1,
+                filters=[['portable_data_hash', '=', colpdh]],
+                select=["manifest_text"]
+            ).execute(num_retries=self.arvrunner.num_retries)
+
+            if not collections["items"]:
+                raise WorkflowException(
+                    "[job %s] %s '%s' cannot be found on API server" % (
+                        self.name, coltype, colpdh))
 
-        # Create new collection in the parent project
-        # with the output contents.
-        self.arvrunner.api.collections().create(body={
-            "owner_uuid": self.arvrunner.project_uuid,
-            "name": colname,
-            "portable_data_hash": record["output"],
-            "manifest_text": collections["items"][0]["manifest_text"]
-        }, ensure_unique_name=True).execute(
-            num_retries=self.arvrunner.num_retries)
+            # Create new collection in the parent project
+            # with the output/log contents.
+            self.arvrunner.api.collections().create(body={
+                "owner_uuid": self.arvrunner.project_uuid,
+                "name": colname,
+                "portable_data_hash": colpdh,
+                "manifest_text": collections["items"][0]["manifest_text"]
+            }, ensure_unique_name=True).execute(
+                num_retries=self.arvrunner.num_retries)
 
     return done_outputs(self, record, tmpdir, outdir, keepdir)
 
index 8d076093abfe6b6fcbb52ccea421763d76aa38b6..ecab5e37beea5cac04b23cc543a87416bab9f680 100755 (executable)
@@ -9,7 +9,7 @@ fi
 reset_container=1
 leave_running=0
 config=dev
-tag=""
+tag="latest"
 
 while test -n "$1" ; do
     arg="$1"
@@ -74,22 +74,30 @@ export ARVADOS_API_HOST=localhost:8000
 export ARVADOS_API_HOST_INSECURE=1
 export ARVADOS_API_TOKEN=\$(cat /var/lib/arvados/superuser_token)
 
-arv-keepdocker --pull arvados/jobs latest
+
+if test "$tag" = "latest" ; then
+  arv-keepdocker --pull arvados/jobs $tag
+else
+  jobsimg=$(curl http://versions.arvados.org/v1/commit/$tag | python -c "import json; import sys; sys.stdout.write(json.load(sys.stdin)['Versions']['Docker']['arvados/jobs'])")
+  arv-keepdocker --pull arvados/jobs $jobsimg
+  docker tag -f arvados/jobs:$jobsimg arvados/jobs:latest
+  arv-keepdocker arvados/jobs latest
+fi
 
 cat >/tmp/cwltest/arv-cwl-jobs <<EOF2
 #!/bin/sh
-exec arvados-cwl-runner --api=jobs --compute-checksum \\\$@
+exec arvados-cwl-runner --api=jobs \\\$@
 EOF2
 chmod +x /tmp/cwltest/arv-cwl-jobs
 
 cat >/tmp/cwltest/arv-cwl-containers <<EOF2
 #!/bin/sh
-exec arvados-cwl-runner --api=containers --compute-checksum \\\$@
+exec arvados-cwl-runner --api=containers \\\$@
 EOF2
 chmod +x /tmp/cwltest/arv-cwl-containers
 
 env
-exec ./run_test.sh $@
+exec ./run_test.sh EXTRA=--compute-checksum $@
 EOF
 
 CODE=$?
index 99dd3cb669729f09d6c45d914178a5e56a57ffd6..fa043baa42a71582509a47505be401850f08187f 100644 (file)
@@ -33,7 +33,15 @@ class TestJob(unittest.TestCase):
             document_loader, avsc_names, schema_metadata, metaschema_loader = cwltool.process.get_schema("v1.0")
 
             list_images_in_arv.return_value = [["zzzzz-4zz18-zzzzzzzzzzzzzzz"]]
-            runner.api.collections().get().execute.return_vaulue = {"portable_data_hash": "99999999999999999999999999999993+99"}
+            runner.api.collections().get().execute.return_value = {"portable_data_hash": "99999999999999999999999999999993+99"}
+            # Simulate reused job from another project so that we can check is a can_read
+            # link is added.
+            runner.api.jobs().create().execute.return_value = {
+                'state': 'Complete' if enable_reuse else 'Queued',
+                'owner_uuid': 'zzzzz-tpzed-yyyyyyyyyyyyyyy' if enable_reuse else 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                'uuid': 'zzzzz-819sb-yyyyyyyyyyyyyyy',
+                'output': None,
+            }
 
             tool = cmap({
                 "inputs": [],
@@ -75,6 +83,17 @@ class TestJob(unittest.TestCase):
                              ['script_version', 'in git', 'a3f2cb186e437bfce0031b024b2157b73ed2717d'],
                              ['docker_image_locator', 'in docker', 'arvados/jobs']]
                 )
+                if enable_reuse:
+                    runner.api.links().create.assert_called_with(
+                        body=JsonDiffMatcher({
+                            'link_class': 'permission',
+                            'name': 'can_read',
+                            "tail_uuid": "zzzzz-8i9sb-zzzzzzzzzzzzzzz",
+                            "head_uuid": "zzzzz-819sb-yyyyyyyyyyyyyyy",
+                        })
+                    )
+                else:
+                    assert not runner.api.links().create.called
 
     # The test passes some fields in builder.resources
     # For the remaining fields, the defaults will apply: {'cores': 1, 'ram': 1024, 'outdirSize': 1024, 'tmpdirSize': 1024}
@@ -161,7 +180,9 @@ class TestJob(unittest.TestCase):
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
         """)
         api.collections().list().execute.side_effect = ({"items": []},
-                                                        {"items": [{"manifest_text": "XYZ"}]})
+                                                        {"items": [{"manifest_text": "XYZ"}]},
+                                                        {"items": []},
+                                                        {"items": [{"manifest_text": "ABC"}]})
 
         arvjob = arvados_cwl.ArvadosJob(runner)
         arvjob.name = "testjob"
@@ -179,20 +200,37 @@ class TestJob(unittest.TestCase):
 
         api.collections().list.assert_has_calls([
             mock.call(),
+            # Output collection check
             mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
                           ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
                           ['name', '=', 'Output 9999999 of testjob']]),
             mock.call().execute(num_retries=0),
             mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999993+99']],
                  select=['manifest_text']),
+            mock.call().execute(num_retries=0),
+            # Log collection's turn
+            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
+                          ['portable_data_hash', '=', '99999999999999999999999999999994+99'],
+                          ['name', '=', 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz']]),
+            mock.call().execute(num_retries=0),
+            mock.call(limit=1, filters=[['portable_data_hash', '=', '99999999999999999999999999999994+99']],
+                 select=['manifest_text']),
             mock.call().execute(num_retries=0)])
 
-        api.collections().create.assert_called_with(
-            ensure_unique_name=True,
-            body={'portable_data_hash': '99999999999999999999999999999993+99',
-                  'manifest_text': 'XYZ',
-                  'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
-                  'name': 'Output 9999999 of testjob'})
+        api.collections().create.assert_has_calls([
+            mock.call(ensure_unique_name=True,
+                      body={'portable_data_hash': '99999999999999999999999999999993+99',
+                            'manifest_text': 'XYZ',
+                            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                            'name': 'Output 9999999 of testjob'}),
+            mock.call().execute(num_retries=0),
+            mock.call(ensure_unique_name=True,
+                      body={'portable_data_hash': '99999999999999999999999999999994+99',
+                            'manifest_text': 'ABC',
+                            'owner_uuid': 'zzzzz-8i9sb-zzzzzzzzzzzzzzz',
+                            'name': 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz'}),
+            mock.call().execute(num_retries=0),
+        ])
 
         arvjob.output_callback.assert_called_with({"out": "stuff"}, "success")
 
@@ -211,7 +249,10 @@ class TestJob(unittest.TestCase):
 2016-11-02_23:12:18 c97qk-8i9sb-cryqw2blvzy4yaj 13358 0 stderr 2016/11/02 23:12:18 crunchrunner: $(task.keep)=/keep
         """)
 
-        api.collections().list().execute.side_effect = ({"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},)
+        api.collections().list().execute.side_effect = (
+            {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},
+            {"items": [{"uuid": "zzzzz-4zz18-zzzzzzzzzzzzzz2"}]},
+        )
 
         arvjob = arvados_cwl.ArvadosJob(runner)
         arvjob.name = "testjob"
@@ -229,10 +270,17 @@ class TestJob(unittest.TestCase):
 
         api.collections().list.assert_has_calls([
             mock.call(),
+            # Output collection
             mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
                                ['portable_data_hash', '=', '99999999999999999999999999999993+99'],
                                ['name', '=', 'Output 9999999 of testjob']]),
-            mock.call().execute(num_retries=0)])
+            mock.call().execute(num_retries=0),
+            # Log collection
+            mock.call(filters=[['owner_uuid', '=', 'zzzzz-8i9sb-zzzzzzzzzzzzzzz'],
+                               ['portable_data_hash', '=', '99999999999999999999999999999994+99'],
+                               ['name', '=', 'Log of zzzzz-8i9sb-zzzzzzzzzzzzzzz']]),
+            mock.call().execute(num_retries=0)
+        ])
 
         self.assertFalse(api.collections().create.called)
 
diff --git a/sdk/go/arvados/collection_fs.go b/sdk/go/arvados/collection_fs.go
new file mode 100644 (file)
index 0000000..83ed192
--- /dev/null
@@ -0,0 +1,235 @@
+package arvados
+
+import (
+       "io"
+       "net/http"
+       "os"
+       "path"
+       "strings"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+type File interface {
+       io.Reader
+       io.Closer
+       io.Seeker
+       Size() int64
+}
+
+type keepClient interface {
+       ManifestFileReader(manifest.Manifest, string) (File, error)
+}
+
+type collectionFile struct {
+       File
+       collection *Collection
+       name       string
+       size       int64
+}
+
+func (cf *collectionFile) Size() int64 {
+       return cf.size
+}
+
+func (cf *collectionFile) Readdir(count int) ([]os.FileInfo, error) {
+       return nil, io.EOF
+}
+
+func (cf *collectionFile) Stat() (os.FileInfo, error) {
+       return collectionDirent{
+               collection: cf.collection,
+               name:       cf.name,
+               size:       cf.size,
+               isDir:      false,
+       }, nil
+}
+
+type collectionDir struct {
+       collection *Collection
+       stream     string
+       dirents    []os.FileInfo
+}
+
+// Readdir implements os.File.
+func (cd *collectionDir) Readdir(count int) ([]os.FileInfo, error) {
+       ret := cd.dirents
+       if count <= 0 {
+               cd.dirents = nil
+               return ret, nil
+       } else if len(ret) == 0 {
+               return nil, io.EOF
+       }
+       var err error
+       if count >= len(ret) {
+               count = len(ret)
+               err = io.EOF
+       }
+       cd.dirents = cd.dirents[count:]
+       return ret[:count], err
+}
+
+// Stat implements os.File.
+func (cd *collectionDir) Stat() (os.FileInfo, error) {
+       return collectionDirent{
+               collection: cd.collection,
+               name:       path.Base(cd.stream),
+               isDir:      true,
+               size:       int64(len(cd.dirents)),
+       }, nil
+}
+
+// Close implements os.File.
+func (cd *collectionDir) Close() error {
+       return nil
+}
+
+// Read implements os.File.
+func (cd *collectionDir) Read([]byte) (int, error) {
+       return 0, nil
+}
+
+// Seek implements os.File.
+func (cd *collectionDir) Seek(int64, int) (int64, error) {
+       return 0, nil
+}
+
+// collectionDirent implements os.FileInfo.
+type collectionDirent struct {
+       collection *Collection
+       name       string
+       isDir      bool
+       mode       os.FileMode
+       size       int64
+}
+
+// Name implements os.FileInfo.
+func (e collectionDirent) Name() string {
+       return e.name
+}
+
+// ModTime implements os.FileInfo.
+func (e collectionDirent) ModTime() time.Time {
+       if e.collection.ModifiedAt == nil {
+               return time.Now()
+       }
+       return *e.collection.ModifiedAt
+}
+
+// Mode implements os.FileInfo.
+func (e collectionDirent) Mode() os.FileMode {
+       if e.isDir {
+               return 0555
+       } else {
+               return 0444
+       }
+}
+
+// IsDir implements os.FileInfo.
+func (e collectionDirent) IsDir() bool {
+       return e.isDir
+}
+
+// Size implements os.FileInfo.
+func (e collectionDirent) Size() int64 {
+       return e.size
+}
+
+// Sys implements os.FileInfo.
+func (e collectionDirent) Sys() interface{} {
+       return nil
+}
+
+// collectionFS implements http.FileSystem.
+type collectionFS struct {
+       collection *Collection
+       client     *Client
+       kc         keepClient
+}
+
+// FileSystem returns an http.FileSystem for the collection.
+func (c *Collection) FileSystem(client *Client, kc keepClient) http.FileSystem {
+       return &collectionFS{
+               collection: c,
+               client:     client,
+               kc:         kc,
+       }
+}
+
+func (c *collectionFS) Open(name string) (http.File, error) {
+       // Ensure name looks the way it does in a manifest.
+       name = path.Clean("/" + name)
+       if name == "/" || name == "./" {
+               name = "."
+       } else if strings.HasPrefix(name, "/") {
+               name = "." + name
+       }
+
+       m := manifest.Manifest{Text: c.collection.ManifestText}
+
+       filesizes := c.fileSizes()
+
+       // Return a file if it exists.
+       if size, ok := filesizes[name]; ok {
+               reader, err := c.kc.ManifestFileReader(m, name)
+               if err != nil {
+                       return nil, err
+               }
+               return &collectionFile{
+                       File:       reader,
+                       collection: c.collection,
+                       name:       path.Base(name),
+                       size:       size,
+               }, nil
+       }
+
+       // Return a directory if it's the root dir or there are file
+       // entries below it.
+       children := map[string]collectionDirent{}
+       for fnm, size := range filesizes {
+               if !strings.HasPrefix(fnm, name+"/") {
+                       continue
+               }
+               isDir := false
+               ent := fnm[len(name)+1:]
+               if i := strings.Index(ent, "/"); i >= 0 {
+                       ent = ent[:i]
+                       isDir = true
+               }
+               e := children[ent]
+               e.collection = c.collection
+               e.isDir = isDir
+               e.name = ent
+               e.size = size
+               children[ent] = e
+       }
+       if len(children) == 0 && name != "." {
+               return nil, os.ErrNotExist
+       }
+       dirents := make([]os.FileInfo, 0, len(children))
+       for _, ent := range children {
+               dirents = append(dirents, ent)
+       }
+       return &collectionDir{
+               collection: c.collection,
+               stream:     name,
+               dirents:    dirents,
+       }, nil
+}
+
+// fileSizes returns a map of files that can be opened. Each key
+// starts with "./".
+func (c *collectionFS) fileSizes() map[string]int64 {
+       var sizes map[string]int64
+       m := manifest.Manifest{Text: c.collection.ManifestText}
+       for ms := range m.StreamIter() {
+               for _, fss := range ms.FileStreamSegments {
+                       if sizes == nil {
+                               sizes = map[string]int64{}
+                       }
+                       sizes[ms.StreamName+"/"+fss.Name] += int64(fss.SegLen)
+               }
+       }
+       return sizes
+}
diff --git a/sdk/go/arvados/collection_fs_test.go b/sdk/go/arvados/collection_fs_test.go
new file mode 100644 (file)
index 0000000..7e8588b
--- /dev/null
@@ -0,0 +1,125 @@
+package arvados
+
+import (
+       "io"
+       "net/http"
+       "os"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionFSSuite{})
+
+type CollectionFSSuite struct {
+       client *Client
+       coll   Collection
+       fs     http.FileSystem
+}
+
+func (s *CollectionFSSuite) SetUpTest(c *check.C) {
+       s.client = NewClientFromEnv()
+       err := s.client.RequestAndDecode(&s.coll, "GET", "arvados/v1/collections/"+arvadostest.FooAndBarFilesInDirUUID, nil, nil)
+       c.Assert(err, check.IsNil)
+       s.fs = s.coll.FileSystem(s.client, nil)
+}
+
+func (s *CollectionFSSuite) TestReaddirFull(c *check.C) {
+       f, err := s.fs.Open("/dir1")
+       c.Assert(err, check.IsNil)
+
+       st, err := f.Stat()
+       c.Assert(err, check.IsNil)
+       c.Check(st.Size(), check.Equals, int64(2))
+       c.Check(st.IsDir(), check.Equals, true)
+
+       fis, err := f.Readdir(0)
+       c.Check(err, check.IsNil)
+       c.Check(len(fis), check.Equals, 2)
+       if len(fis) > 0 {
+               c.Check(fis[0].Size(), check.Equals, int64(3))
+       }
+}
+
+func (s *CollectionFSSuite) TestReaddirLimited(c *check.C) {
+       f, err := s.fs.Open("./dir1")
+       c.Assert(err, check.IsNil)
+
+       fis, err := f.Readdir(1)
+       c.Check(err, check.IsNil)
+       c.Check(len(fis), check.Equals, 1)
+       if len(fis) > 0 {
+               c.Check(fis[0].Size(), check.Equals, int64(3))
+       }
+
+       fis, err = f.Readdir(1)
+       c.Check(err, check.Equals, io.EOF)
+       c.Check(len(fis), check.Equals, 1)
+       if len(fis) > 0 {
+               c.Check(fis[0].Size(), check.Equals, int64(3))
+       }
+
+       fis, err = f.Readdir(1)
+       c.Check(len(fis), check.Equals, 0)
+       c.Check(err, check.NotNil)
+       c.Check(err, check.Equals, io.EOF)
+
+       f, err = s.fs.Open("dir1")
+       c.Assert(err, check.IsNil)
+       fis, err = f.Readdir(1)
+       c.Check(len(fis), check.Equals, 1)
+       c.Assert(err, check.IsNil)
+       fis, err = f.Readdir(2)
+       c.Check(len(fis), check.Equals, 1)
+       c.Assert(err, check.Equals, io.EOF)
+       fis, err = f.Readdir(2)
+       c.Check(len(fis), check.Equals, 0)
+       c.Assert(err, check.Equals, io.EOF)
+}
+
+func (s *CollectionFSSuite) TestPathMunge(c *check.C) {
+       for _, path := range []string{".", "/", "./", "///", "/../", "/./.."} {
+               f, err := s.fs.Open(path)
+               c.Assert(err, check.IsNil)
+
+               st, err := f.Stat()
+               c.Assert(err, check.IsNil)
+               c.Check(st.Size(), check.Equals, int64(1))
+               c.Check(st.IsDir(), check.Equals, true)
+       }
+       for _, path := range []string{"/dir1", "dir1", "./dir1", "///dir1//.//", "../dir1/../dir1/"} {
+               c.Logf("%q", path)
+               f, err := s.fs.Open(path)
+               c.Assert(err, check.IsNil)
+
+               st, err := f.Stat()
+               c.Assert(err, check.IsNil)
+               c.Check(st.Size(), check.Equals, int64(2))
+               c.Check(st.IsDir(), check.Equals, true)
+       }
+}
+
+func (s *CollectionFSSuite) TestNotExist(c *check.C) {
+       for _, path := range []string{"/no", "no", "./no", "n/o", "/n/o"} {
+               f, err := s.fs.Open(path)
+               c.Assert(f, check.IsNil)
+               c.Assert(err, check.NotNil)
+               c.Assert(os.IsNotExist(err), check.Equals, true)
+       }
+}
+
+func (s *CollectionFSSuite) TestOpenFile(c *check.C) {
+       c.Skip("cannot test files with nil keepclient")
+
+       f, err := s.fs.Open("/foo.txt")
+       c.Assert(err, check.IsNil)
+       st, err := f.Stat()
+       c.Assert(err, check.IsNil)
+       c.Check(st.Size(), check.Equals, int64(3))
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
index 299d18638a0fda75d5f170ca8609c5e1c0e5f7e1..7e21da4982b3ecb2325f3117008e15a7a8513a7d 100644 (file)
@@ -18,6 +18,9 @@ const (
        FooPdh                  = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
        HelloWorldPdh           = "55713e6a34081eb03609e7ad5fcad129+62"
 
+       FooAndBarFilesInDirUUID = "zzzzz-4zz18-foonbarfilesdir"
+       FooAndBarFilesInDirPDH  = "6bbac24198d09a93975f60098caf0bdf+62"
+
        Dispatch1Token    = "kwi8oowusvbutahacwk2geulqewy5oaqmpalczfna4b6bb0hfw"
        Dispatch1AuthUUID = "zzzzz-gj3su-k9dvestay1plssr"
 
index 344a70c50bf09798e27bb41187fbdcb4d8d35b6e..527318eb49c67046ca86bf2183eceb75d3bcc157 100644 (file)
@@ -6,19 +6,10 @@ import (
        "io"
        "os"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
-// A Reader implements, io.Reader, io.Seeker, and io.Closer, and has a
-// Len() method that returns the total number of bytes available to
-// read.
-type Reader interface {
-       io.Reader
-       io.Seeker
-       io.Closer
-       Len() uint64
-}
-
 const (
        // After reading a data block from Keep, cfReader slices it up
        // and sends the slices to a buffered channel to be consumed
@@ -38,7 +29,7 @@ var ErrNoManifest = errors.New("Collection has no manifest")
 // CollectionFileReader returns a Reader that reads content from a single file
 // in the collection. The filename must be relative to the root of the
 // collection.  A leading prefix of "/" or "./" in the filename is ignored.
-func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (Reader, error) {
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (arvados.File, error) {
        mText, ok := collection["manifest_text"].(string)
        if !ok {
                return nil, ErrNoManifest
@@ -47,7 +38,7 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
        return kc.ManifestFileReader(m, filename)
 }
 
-func (kc *KeepClient) ManifestFileReader(m manifest.Manifest, filename string) (Reader, error) {
+func (kc *KeepClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        f := &file{
                kc: kc,
        }
@@ -164,9 +155,9 @@ func (f *file) Seek(offset int64, whence int) (int64, error) {
        return f.offset, nil
 }
 
-// Len returns the file size in bytes.
-func (f *file) Len() uint64 {
-       return uint64(f.size)
+// Size returns the file size in bytes.
+func (f *file) Size() int64 {
+       return f.size
 }
 
 func (f *file) load(m manifest.Manifest, path string) error {
index 4aab5d56c6541bdfb8eef6843248ca54a7dda7a6..ba7ff2bbd9f62af754bb59762d17342eb1484441 100644 (file)
@@ -40,7 +40,9 @@ upload_opts.add_argument('--version', action='version',
                          help='Print version and exit.')
 upload_opts.add_argument('paths', metavar='path', type=str, nargs='*',
                          help="""
-Local file or directory. Default: read from standard input.
+Local file or directory. If path is a directory reference with a trailing
+slash, then just upload the directory's contents; otherwise upload the
+directory itself. Default: read from standard input.
 """)
 
 _group = upload_opts.add_mutually_exclusive_group()
@@ -451,9 +453,17 @@ class ArvPutUploadJob(object):
                 elif os.path.isdir(path):
                     # Use absolute paths on cache index so CWD doesn't interfere
                     # with the caching logic.
-                    prefixdir = path = os.path.abspath(path)
-                    if prefixdir != '/':
-                        prefixdir += '/'
+                    orig_path = path
+                    path = os.path.abspath(path)
+                    if orig_path[-1:] == os.sep:
+                        # When passing a directory reference with a trailing slash,
+                        # its contents should be uploaded directly to the collection's root.
+                        prefixdir = path
+                    else:
+                        # When passing a directory reference with no trailing slash,
+                        # upload the directory to the collection's root.
+                        prefixdir = os.path.dirname(path)
+                    prefixdir += os.sep
                     for root, dirs, files in os.walk(path, followlinks=self.follow_links):
                         root_relpath = os.path.relpath(root, path)
                         # Exclude files/dirs by full path matching pattern
index 6d103526f40ebc1b8de64cdf2956f1867cdcf538..c2eaf12bbb048f24239839da2322fd0be69815c6 100644 (file)
@@ -791,7 +791,8 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         datadir = self.make_tmpdir()
         with open(os.path.join(datadir, "foo"), "w") as f:
             f.write("The quick brown fox jumped over the lazy dog")
-        p = subprocess.Popen([sys.executable, arv_put.__file__, datadir],
+        p = subprocess.Popen([sys.executable, arv_put.__file__,
+                              os.path.join(datadir, 'foo')],
                              stdout=subprocess.PIPE,
                              stderr=subprocess.PIPE,
                              env=self.ENVIRON)
@@ -838,7 +839,40 @@ class ArvPutIntegrationTest(run_test_server.TestCaseWithServers,
         self.assertEqual(col['uuid'], updated_col['uuid'])
         # Get the manifest and check that the new file is being included
         c = arv_put.api_client.collections().get(uuid=updated_col['uuid']).execute()
-        self.assertRegex(c['manifest_text'], r'^\. .*:44:file2\n')
+        self.assertRegex(c['manifest_text'], r'^\..* .*:44:file2\n')
+
+    def test_upload_directory_reference_without_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' was written inside a subcollection
+        # OTOH, 'bar' should have been directly uploaded on the root collection
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar\n\./.+ .*:11:foo\n')
+
+    def test_upload_directory_reference_with_trailing_slash(self):
+        tmpdir1 = self.make_tmpdir()
+        tmpdir2 = self.make_tmpdir()
+        with open(os.path.join(tmpdir1, 'foo'), 'w') as f:
+            f.write('This is foo')
+        with open(os.path.join(tmpdir2, 'bar'), 'w') as f:
+            f.write('This is not foo')
+        # Upload one directory (with trailing slash) and one file
+        col = self.run_and_find_collection("", ['--no-progress',
+                                                tmpdir1 + os.sep,
+                                                os.path.join(tmpdir2, 'bar')])
+        self.assertNotEqual(None, col['uuid'])
+        c = arv_put.api_client.collections().get(uuid=col['uuid']).execute()
+        # Check that 'foo' and 'bar' were written at the same level
+        self.assertRegex(c['manifest_text'], r'^\. .*:15:bar .*:11:foo\n')
 
     def test_put_collection_with_high_redundancy(self):
         # Write empty data: we're not testing CollectionWriter, just
index 183ed4d8a80e269356c82cdf19b08d5dc0120a80..a2c2528b905ae69d630d13e1ea7df1f000db03b6 100644 (file)
@@ -4,15 +4,13 @@ class Arvados::V1::RepositoriesController < ApplicationController
   before_filter :admin_required, :only => :get_all_permissions
 
   def get_all_permissions
-    # users is a map of {user_uuid => User object}
-    users = {}
     # user_aks is a map of {user_uuid => array of public keys}
     user_aks = {}
     # admins is an array of user_uuids
     admins = []
-    User.eager_load(:authorized_keys).find_each do |u|
-      next unless u.is_active or u.uuid == anonymous_user_uuid
-      users[u.uuid] = u
+    User.
+      where('users.is_active = ? or users.uuid = ?', true, anonymous_user_uuid).
+      eager_load(:authorized_keys).find_each do |u|
       user_aks[u.uuid] = u.authorized_keys.collect do |ak|
         {
           public_key: ak.public_key,
@@ -21,6 +19,7 @@ class Arvados::V1::RepositoriesController < ApplicationController
       end
       admins << u.uuid if u.is_admin
     end
+    all_group_permissions = User.all_group_permissions
     @repo_info = {}
     Repository.eager_load(:permissions).find_each do |repo|
       @repo_info[repo.uuid] = {
@@ -42,8 +41,8 @@ class Arvados::V1::RepositoriesController < ApplicationController
           # A group has permission. Each user who has access to this
           # group also has access to the repository. Access level is
           # min(group-to-repo permission, user-to-group permission).
-          users.each do |user_uuid, user|
-            perm_mask = user.group_permissions[perm.tail_uuid]
+          user_aks.each do |user_uuid, _|
+            perm_mask = all_group_permissions[user_uuid][perm.tail_uuid]
             if not perm_mask
               next
             elsif perm_mask[:manage] and perm.name == 'can_manage'
@@ -54,7 +53,7 @@ class Arvados::V1::RepositoriesController < ApplicationController
               evidence << {name: 'can_read', user_uuid: user_uuid}
             end
           end
-        elsif users[perm.tail_uuid]
+        elsif user_aks.has_key?(perm.tail_uuid)
           # A user has permission; the user exists; and either the
           # user is active, or it's the special case of the anonymous
           # user which is never "active" but is allowed to read
@@ -66,7 +65,7 @@ class Arvados::V1::RepositoriesController < ApplicationController
       ([repo.owner_uuid] | admins).each do |user_uuid|
         # Except: no permissions for inactive users, even if they own
         # repositories.
-        next unless users[user_uuid]
+        next unless user_aks.has_key?(user_uuid)
         evidence << {name: 'can_manage', user_uuid: user_uuid}
       end
       # Distill all the evidence about permissions on this repository
index 7a1f69991117118f188f1516a313a38432662bd4..96dfaaddd300e5466050ed91eb87e233fdd02420 100644 (file)
@@ -64,36 +64,19 @@ class Arvados::V1::UsersController < ApplicationController
 
   # create user object and all the needed links
   def setup
-    @object = nil
     if params[:uuid]
-      @object = User.find_by_uuid params[:uuid]
+      @object = User.find_by_uuid(params[:uuid])
       if !@object
         return render_404_if_no_object
       end
-      object_found = true
+    elsif !params[:user]
+      raise ArgumentError.new "Required uuid or user"
+    elsif !params[:user]['email']
+      raise ArgumentError.new "Require user email"
+    elsif !params[:openid_prefix]
+      raise ArgumentError.new "Required openid_prefix parameter is missing."
     else
-      if !params[:user]
-        raise ArgumentError.new "Required uuid or user"
-      else
-        if params[:user]['uuid']
-          @object = User.find_by_uuid params[:user]['uuid']
-          if @object
-            object_found = true
-          end
-        end
-
-        if !@object
-          if !params[:user]['email']
-            raise ArgumentError.new "Require user email"
-          end
-
-          if !params[:openid_prefix]
-            raise ArgumentError.new "Required openid_prefix parameter is missing."
-          end
-
-          @object = model_class.create! resource_attrs
-        end
-      end
+      @object = model_class.create! resource_attrs
     end
 
     # It's not always possible for the client to know the user's
@@ -106,23 +89,18 @@ class Arvados::V1::UsersController < ApplicationController
     elsif @object.username.nil?
       raise ArgumentError.
         new("cannot setup a repository because user has no username")
-    elsif object_found and
-        params[:repo_name].start_with?("#{@object.username}/")
+    elsif params[:repo_name].index("/")
       full_repo_name = params[:repo_name]
     else
       full_repo_name = "#{@object.username}/#{params[:repo_name]}"
     end
 
-    if object_found
-      @response = @object.setup_repo_vm_links full_repo_name,
-                    params[:vm_uuid], params[:openid_prefix]
-    else
-      @response = User.setup @object, params[:openid_prefix],
-                    full_repo_name, params[:vm_uuid]
-    end
+    @response = @object.setup(repo_name: full_repo_name,
+                              vm_uuid: params[:vm_uuid],
+                              openid_prefix: params[:openid_prefix])
 
     # setup succeeded. send email to user
-    if params[:send_notification_email] == true || params[:send_notification_email] == 'true'
+    if params[:send_notification_email]
       UserNotifier.account_is_setup(@object).deliver_now
     end
 
index 82ea0acbd63d9b12e2b31cd4d35fec783b869b59..8eb40f9fbb7d9e96988a233b1436f491ef665cff 100644 (file)
@@ -104,17 +104,19 @@ class Node < ArvadosModel
 
     # Assign slot_number
     if self.slot_number.nil?
-      try_slot = 1
-      begin
-        self.slot_number = try_slot
+      while true
+        n = self.class.available_slot_number
+        if n.nil?
+          raise "No available node slots"
+        end
+        self.slot_number = n
         begin
           self.save!
           break
         rescue ActiveRecord::RecordNotUnique
-          try_slot += 1
+          # try again
         end
-        raise "No available node slots" if try_slot == Rails.configuration.max_compute_nodes
-      end while true
+      end
     end
 
     # Assign hostname
@@ -136,6 +138,21 @@ class Node < ArvadosModel
 
   protected
 
+  def self.available_slot_number
+    # Join the sequence 1..max with the nodes table. Return the first
+    # (i.e., smallest) value that doesn't match the slot_number of any
+    # existing node.
+    connection.exec_query('SELECT n FROM generate_series(1, $1) AS slot(n)
+                          LEFT JOIN nodes ON n=slot_number
+                          WHERE slot_number IS NULL
+                          LIMIT 1',
+                          # query label:
+                          'Node.available_slot_number',
+                          # [col_id, val] for $1 vars:
+                          [[nil, Rails.configuration.max_compute_nodes]],
+                         ).rows.first.andand.first
+  end
+
   def ensure_ping_secret
     self.info['ping_secret'] ||= rand(2**256).to_s(36)
   end
index d944474712708b3207f28807e10349b8f34007b0..bca1eef7261f97ceabf6e95c828fb07aaae167b2 100644 (file)
@@ -20,6 +20,7 @@ class User < ArvadosModel
   before_update :verify_repositories_empty, :if => Proc.new { |user|
     user.username.nil? and user.username_changed?
   }
+  before_update :setup_on_activate
   before_create :check_auto_admin
   before_create :set_initial_username, :if => Proc.new { |user|
     user.username.nil? and user.email
@@ -140,11 +141,29 @@ class User < ArvadosModel
     end
   end
 
+  # Return a hash of {user_uuid: group_perms}
+  def self.all_group_permissions
+    install_view('permission')
+    all_perms = {}
+    ActiveRecord::Base.connection.
+      exec_query('SELECT user_uuid, target_owner_uuid, max(perm_level)
+                  FROM permission_view
+                  WHERE target_owner_uuid IS NOT NULL
+                  GROUP BY user_uuid, target_owner_uuid',
+                  # "name" arg is a query label that appears in logs:
+                  "all_group_permissions",
+                  ).rows.each do |user_uuid, group_uuid, max_p_val|
+      all_perms[user_uuid] ||= {}
+      all_perms[user_uuid][group_uuid] = PERMS_FOR_VAL[max_p_val.to_i]
+    end
+    all_perms
+  end
+
   # Return a hash of {group_uuid: perm_hash} where perm_hash[:read]
   # and perm_hash[:write] are true if this user can read and write
   # objects owned by group_uuid.
   def calculate_group_permissions
-    install_view('permission')
+    self.class.install_view('permission')
 
     group_perms = {}
     ActiveRecord::Base.connection.
@@ -182,15 +201,11 @@ class User < ArvadosModel
     r
   end
 
-  def self.setup(user, openid_prefix, repo_name=nil, vm_uuid=nil)
-    return user.setup_repo_vm_links(repo_name, vm_uuid, openid_prefix)
-  end
-
   # create links
-  def setup_repo_vm_links(repo_name, vm_uuid, openid_prefix)
+  def setup(openid_prefix:, repo_name: nil, vm_uuid: nil)
     oid_login_perm = create_oid_login_perm openid_prefix
     repo_perm = create_user_repo_link repo_name
-    vm_login_perm = create_vm_login_permission_link vm_uuid, username
+    vm_login_perm = create_vm_login_permission_link(vm_uuid, username) if vm_uuid
     group_perm = create_user_group_link
 
     return [oid_login_perm, repo_perm, vm_login_perm, group_perm, self].compact
@@ -364,13 +379,12 @@ class User < ArvadosModel
     merged
   end
 
-  def create_oid_login_perm (openid_prefix)
-    login_perm_props = { "identity_url_prefix" => openid_prefix}
-
+  def create_oid_login_perm(openid_prefix)
     # Check oid_login_perm
     oid_login_perms = Link.where(tail_uuid: self.email,
-                                   link_class: 'permission',
-                                   name: 'can_login').where("head_uuid = ?", self.uuid)
+                                 head_uuid: self.uuid,
+                                 link_class: 'permission',
+                                 name: 'can_login')
 
     if !oid_login_perms.any?
       # create openid login permission
@@ -378,8 +392,9 @@ class User < ArvadosModel
                                    name: 'can_login',
                                    tail_uuid: self.email,
                                    head_uuid: self.uuid,
-                                   properties: login_perm_props
-                                  )
+                                   properties: {
+                                     "identity_url_prefix" => openid_prefix,
+                                   })
       logger.info { "openid login permission: " + oid_login_perm[:uuid] }
     else
       oid_login_perm = oid_login_perms.first
@@ -407,15 +422,12 @@ class User < ArvadosModel
   # create login permission for the given vm_uuid, if it does not already exist
   def create_vm_login_permission_link(vm_uuid, repo_name)
     # vm uuid is optional
-    if vm_uuid
-      vm = VirtualMachine.where(uuid: vm_uuid).first
+    return if !vm_uuid
 
-      if not vm
-        logger.warn "Could not find virtual machine for #{vm_uuid.inspect}"
-        raise "No vm found for #{vm_uuid}"
-      end
-    else
-      return
+    vm = VirtualMachine.where(uuid: vm_uuid).first
+    if !vm
+      logger.warn "Could not find virtual machine for #{vm_uuid.inspect}"
+      raise "No vm found for #{vm_uuid}"
     end
 
     logger.info { "vm uuid: " + vm[:uuid] }
@@ -468,9 +480,17 @@ class User < ArvadosModel
     end
   end
 
+  # Automatically setup if is_active flag turns on
+  def setup_on_activate
+    return if [system_user_uuid, anonymous_user_uuid].include?(self.uuid)
+    if is_active && (new_record? || is_active_changed?)
+      setup(openid_prefix: Rails.configuration.default_openid_prefix)
+    end
+  end
+
   # Automatically setup new user during creation
   def auto_setup_new_user
-    setup_repo_vm_links(nil, nil, Rails.configuration.default_openid_prefix)
+    setup(openid_prefix: Rails.configuration.default_openid_prefix)
     if username
       create_vm_login_permission_link(Rails.configuration.auto_setup_new_users_with_vm_uuid,
                                       username)
index e9f016dc051a06ec09f9aa071ab98fc0a15aa236..4375d775350cd9001353231339ec38e3e5d33c86 100644 (file)
@@ -4,6 +4,8 @@
 module CanBeAnOwner
 
   def self.included(base)
+    base.extend(ClassMethods)
+
     # Rails' "has_many" can prevent us from destroying the owner
     # record when other objects refer to it.
     ActiveRecord::Base.connection.tables.each do |t|
@@ -22,8 +24,28 @@ module CanBeAnOwner
     base.validate :restrict_uuid_change_breaking_associations
   end
 
+  module ClassMethods
+    def install_view(type)
+      conn = ActiveRecord::Base.connection
+      transaction do
+        # Check whether the temporary view has already been created
+        # during this connection. If not, create it.
+        conn.exec_query "SAVEPOINT check_#{type}_view"
+        begin
+          conn.exec_query("SELECT 1 FROM #{type}_view LIMIT 0")
+        rescue
+          conn.exec_query "ROLLBACK TO SAVEPOINT check_#{type}_view"
+          sql = File.read(Rails.root.join("lib", "create_#{type}_view.sql"))
+          conn.exec_query(sql)
+        ensure
+          conn.exec_query "RELEASE SAVEPOINT check_#{type}_view"
+        end
+      end
+    end
+  end
+
   def descendant_project_uuids
-    install_view('ancestor')
+    self.class.install_view('ancestor')
     ActiveRecord::Base.connection.
       exec_query('SELECT ancestor_view.uuid
                   FROM ancestor_view
@@ -59,22 +81,4 @@ module CanBeAnOwner
       self.owner_uuid = uuid
     end
   end
-
-  def install_view(type)
-    conn = ActiveRecord::Base.connection
-    self.class.transaction do
-      # Check whether the temporary view has already been created
-      # during this connection. If not, create it.
-      conn.exec_query "SAVEPOINT check_#{type}_view"
-      begin
-        conn.exec_query("SELECT 1 FROM #{type}_view LIMIT 0")
-      rescue
-        conn.exec_query "ROLLBACK TO SAVEPOINT check_#{type}_view"
-        sql = File.read(Rails.root.join("lib", "create_#{type}_view.sql"))
-        conn.exec_query(sql)
-      ensure
-        conn.exec_query "RELEASE SAVEPOINT check_#{type}_view"
-      end
-    end
-  end
 end
index f98e482dd84a190fb08f3e606be45835119f986f..da9c595e8d186189f9a68ef1cda91519e7a88a09 100644 (file)
@@ -214,26 +214,6 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
         @vm_uuid, resp_obj['uuid'], 'arvados#virtualMachine', false, 'VirtualMachine'
   end
 
-  test "invoke setup with existing uuid in user, verify response" do
-    authorize_with :admin
-    inactive_user = users(:inactive)
-
-    post :setup, {
-      user: {uuid: inactive_user['uuid']},
-      openid_prefix: 'https://www.google.com/accounts/o8/id'
-    }
-
-    assert_response :success
-
-    response_items = JSON.parse(@response.body)['items']
-    resp_obj = find_obj_in_resp response_items, 'User', nil
-
-    assert_not_nil resp_obj['uuid'], 'expected uuid for the new user'
-    assert_equal inactive_user['uuid'], resp_obj['uuid']
-    assert_equal inactive_user['email'], resp_obj['email'],
-        'expecting inactive user email'
-  end
-
   test "invoke setup with existing uuid but different email, expect original email" do
     authorize_with :admin
     inactive_user = users(:inactive)
@@ -660,6 +640,24 @@ class Arvados::V1::UsersControllerTest < ActionController::TestCase
     assert (setup_email.body.to_s.include? "#{Rails.configuration.workbench_address}users/#{created['uuid']}/virtual_machines"), 'Expected virtual machines url in email body'
   end
 
+  test "setup inactive user by changing is_active to true" do
+    authorize_with :admin
+    active_user = users(:active)
+
+    # invoke setup with a repository
+    put :update, {
+          id: active_user['uuid'],
+          user: {
+            is_active: true,
+          }
+        }
+    assert_response :success
+    assert_equal active_user['uuid'], json_response['uuid']
+    updated = User.where(uuid: active_user['uuid']).first
+    assert_equal(true, updated.is_active)
+    assert_equal({read: true}, updated.group_permissions[all_users_group_uuid])
+  end
+
   test "non-admin user can get basic information about readable users" do
     authorize_with :spectator
     get(:index)
index 38ac12267aaf8ec6b894835ae0f5876aff9e04d2..09f29b81c08dfba105a5dfa613cb6fb421caa9bb 100644 (file)
@@ -57,8 +57,15 @@ class UsersTest < ActionDispatch::IntegrationTest
         email: "foo@example.com"
       }
     }, auth(:admin)
+    assert_response 422         # cannot create another user with same UUID
 
-    assert_response :success
+    # invoke setup on the same user
+    post "/arvados/v1/users/setup", {
+      repo_name: repo_name,
+      vm_uuid: virtual_machines(:testvm).uuid,
+      openid_prefix: 'https://www.google.com/accounts/o8/id',
+      uuid: 'zzzzz-tpzed-abcdefghijklmno',
+    }, auth(:admin)
 
     response_items = json_response['items']
 
index 2330e7c528f6a304b05cf318fcc2482e91e62b8d..e3bd753c33ac870194e19ec01ecea727510f6424 100644 (file)
@@ -128,9 +128,8 @@ class NodeTest < ActiveSupport::TestCase
   test "ping two nodes one with no hostname and one with hostname and check hostnames" do
     # ping node with no hostname and expect it set with config format
     node = ping_node(:new_with_no_hostname, {})
-    slot_number = node.slot_number
     refute_nil node.slot_number
-    assert_equal "compute#{slot_number}", node.hostname
+    assert_equal "compute#{node.slot_number}", node.hostname
 
     # ping node with a hostname and expect it to be unchanged
     node2 = ping_node(:new_with_custom_hostname, {})
@@ -191,4 +190,22 @@ class NodeTest < ActiveSupport::TestCase
       assert_equal '10.5.5.5', n1.ip_address
     end
   end
+
+  test 'run out of slots' do
+    Rails.configuration.max_compute_nodes = 3
+    act_as_system_user do
+      Node.destroy_all
+      (1..4).each do |i|
+        n = Node.create!
+        args = { ip: "10.0.0.#{i}", ping_secret: n.info['ping_secret'] }
+        if i <= Rails.configuration.max_compute_nodes
+          n.ping(args)
+        else
+          assert_raises do
+            n.ping(args)
+          end
+        end
+      end
+    end
+  end
 end
index 742deda0c663a0946b8eac12f224b5c072069f25..d61787320f2d4c996a772a19950543d930276702 100644 (file)
@@ -447,7 +447,9 @@ class UserTest < ActiveSupport::TestCase
 
     vm = VirtualMachine.create
 
-    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
+    response = user.setup(openid_prefix: openid_prefix,
+                          repo_name: 'foo/testrepo',
+                          vm_uuid: vm.uuid)
 
     resp_user = find_obj_in_resp response, 'User'
     verify_user resp_user, email
@@ -490,7 +492,9 @@ class UserTest < ActiveSupport::TestCase
 
     verify_link resp_link, 'permission', 'can_login', email, bad_uuid
 
-    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
+    response = user.setup(openid_prefix: openid_prefix,
+                          repo_name: 'foo/testrepo',
+                          vm_uuid: vm.uuid)
 
     resp_user = find_obj_in_resp response, 'User'
     verify_user resp_user, email
@@ -522,7 +526,7 @@ class UserTest < ActiveSupport::TestCase
 
     user = User.create ({uuid: 'zzzzz-tpzed-abcdefghijklmno', email: email})
 
-    response = User.setup user, openid_prefix
+    response = user.setup(openid_prefix: openid_prefix)
 
     resp_user = find_obj_in_resp response, 'User'
     verify_user resp_user, email
@@ -537,7 +541,8 @@ class UserTest < ActiveSupport::TestCase
     verify_link group_perm, 'permission', 'can_read', resp_user[:uuid], nil
 
     # invoke setup again with repo_name
-    response = User.setup user, openid_prefix, 'foo/testrepo'
+    response = user.setup(openid_prefix: openid_prefix,
+                          repo_name: 'foo/testrepo')
     resp_user = find_obj_in_resp response, 'User', nil
     verify_user resp_user, email
     assert_equal user.uuid, resp_user[:uuid], 'expected uuid not found'
@@ -551,7 +556,9 @@ class UserTest < ActiveSupport::TestCase
     # invoke setup again with a vm_uuid
     vm = VirtualMachine.create
 
-    response = User.setup user, openid_prefix, 'foo/testrepo', vm.uuid
+    response = user.setup(openid_prefix: openid_prefix,
+                          repo_name: 'foo/testrepo',
+                          vm_uuid: vm.uuid)
 
     resp_user = find_obj_in_resp response, 'User', nil
     verify_user resp_user, email
index aea93df1dc69970ce00d388aecfafc43e842a634..4a91401573f444598dc80582dabea0b3c9ba7231 100644 (file)
@@ -49,7 +49,7 @@ var ErrCancelled = errors.New("Cancelled")
 // IKeepClient is the minimal Keep API methods used by crunch-run.
 type IKeepClient interface {
        PutHB(hash string, buf []byte) (string, int, error)
-       ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error)
+       ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
 }
 
 // NewLogWriter is a factory function to create a new log writer.
@@ -676,7 +676,7 @@ func (runner *ContainerRunner) AttachStreams() (err error) {
        runner.CrunchLog.Print("Attaching container streams")
 
        // If stdin mount is provided, attach it to the docker container
-       var stdinRdr keepclient.Reader
+       var stdinRdr arvados.File
        var stdinJson []byte
        if stdinMnt, ok := runner.Container.Mounts["stdin"]; ok {
                if stdinMnt.Kind == "collection" {
index 8cefbedf19200f165eb7d607dec93bbad330b6f5..1577215afa745b5f9d50311cd5828ab15f592b09 100644 (file)
@@ -24,7 +24,6 @@ import (
 
        "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
-       "git.curoverse.com/arvados.git/sdk/go/keepclient"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
 
        dockertypes "github.com/docker/docker/api/types"
@@ -305,10 +304,10 @@ func (client *KeepTestClient) PutHB(hash string, buf []byte) (string, int, error
 
 type FileWrapper struct {
        io.ReadCloser
-       len uint64
+       len int64
 }
 
-func (fw FileWrapper) Len() uint64 {
+func (fw FileWrapper) Size() int64 {
        return fw.len
 }
 
@@ -316,7 +315,7 @@ func (fw FileWrapper) Seek(int64, int) (int64, error) {
        return 0, errors.New("not implemented")
 }
 
-func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error) {
+func (client *KeepTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        if filename == hwImageId+".tar" {
                rdr := ioutil.NopCloser(&bytes.Buffer{})
                client.Called = true
@@ -404,7 +403,7 @@ func (KeepErrorTestClient) PutHB(hash string, buf []byte) (string, int, error) {
        return "", 0, errors.New("KeepError")
 }
 
-func (KeepErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error) {
+func (KeepErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        return nil, errors.New("KeepError")
 }
 
@@ -424,7 +423,7 @@ func (ErrorReader) Close() error {
        return nil
 }
 
-func (ErrorReader) Len() uint64 {
+func (ErrorReader) Size() int64 {
        return 0
 }
 
@@ -432,7 +431,7 @@ func (ErrorReader) Seek(int64, int) (int64, error) {
        return 0, errors.New("ErrorReader")
 }
 
-func (KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error) {
+func (KeepReadErrorTestClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        return ErrorReader{}, nil
 }
 
index ab7c65310b0abbf6b9ba1c5eb2b8a22142ab8347..886e5910b5b26ea93a9428e441f273fe316330f6 100644 (file)
@@ -1,7 +1,6 @@
 package main
 
 import (
-       "fmt"
        "sync"
        "sync/atomic"
        "time"
@@ -42,7 +41,7 @@ type cachedPDH struct {
 
 type cachedCollection struct {
        expire     time.Time
-       collection map[string]interface{}
+       collection *arvados.Collection
 }
 
 type cachedPermission struct {
@@ -82,7 +81,7 @@ func (c *cache) Stats() cacheStats {
        }
 }
 
-func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceReload bool) (map[string]interface{}, error) {
+func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceReload bool) (*arvados.Collection, error) {
        c.setupOnce.Do(c.setup)
 
        atomic.AddUint64(&c.stats.Requests, 1)
@@ -103,7 +102,6 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
        var pdh string
        if arvadosclient.PDHMatch(targetID) {
                pdh = targetID
-       } else if forceReload {
        } else if ent, cached := c.pdhs.Get(targetID); cached {
                ent := ent.(*cachedPDH)
                if ent.expire.Before(time.Now()) {
@@ -114,7 +112,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                }
        }
 
-       var collection map[string]interface{}
+       var collection *arvados.Collection
        if pdh != "" {
                collection = c.lookupCollection(pdh)
        }
@@ -127,14 +125,12 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                // _and_ the current token has permission, we can
                // use our cached manifest.
                atomic.AddUint64(&c.stats.APICalls, 1)
-               var current map[string]interface{}
+               var current arvados.Collection
                err := arv.Get("collections", targetID, selectPDH, &current)
                if err != nil {
                        return nil, err
                }
-               if checkPDH, ok := current["portable_data_hash"].(string); !ok {
-                       return nil, fmt.Errorf("API response for %q had no PDH", targetID)
-               } else if checkPDH == pdh {
+               if current.PortableDataHash == pdh {
                        exp := time.Now().Add(time.Duration(c.TTL))
                        c.permissions.Add(permKey, &cachedPermission{
                                expire: exp,
@@ -150,7 +146,7 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
                        // PDH changed, but now we know we have
                        // permission -- and maybe we already have the
                        // new PDH in the cache.
-                       if coll := c.lookupCollection(checkPDH); coll != nil {
+                       if coll := c.lookupCollection(current.PortableDataHash); coll != nil {
                                return coll, nil
                        }
                }
@@ -162,23 +158,19 @@ func (c *cache) Get(arv *arvadosclient.ArvadosClient, targetID string, forceRelo
        if err != nil {
                return nil, err
        }
-       pdh, ok := collection["portable_data_hash"].(string)
-       if !ok {
-               return nil, fmt.Errorf("API response for %q had no PDH", targetID)
-       }
        exp := time.Now().Add(time.Duration(c.TTL))
        c.permissions.Add(permKey, &cachedPermission{
                expire: exp,
        })
        c.pdhs.Add(targetID, &cachedPDH{
                expire: exp,
-               pdh:    pdh,
+               pdh:    collection.PortableDataHash,
        })
-       c.collections.Add(pdh, &cachedCollection{
+       c.collections.Add(collection.PortableDataHash, &cachedCollection{
                expire:     exp,
                collection: collection,
        })
-       if int64(len(collection["manifest_text"].(string))) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
+       if int64(len(collection.ManifestText)) > c.MaxCollectionBytes/int64(c.MaxCollectionEntries) {
                go c.pruneCollections()
        }
        return collection, nil
@@ -203,7 +195,7 @@ func (c *cache) pruneCollections() {
                        continue
                }
                ent := v.(*cachedCollection)
-               n := len(ent.collection["manifest_text"].(string))
+               n := len(ent.collection.ManifestText)
                size += int64(n)
                entsize[i] = n
                expired[i] = ent.expire.Before(now)
@@ -236,12 +228,12 @@ func (c *cache) collectionBytes() uint64 {
                if !ok {
                        continue
                }
-               size += uint64(len(v.(*cachedCollection).collection["manifest_text"].(string)))
+               size += uint64(len(v.(*cachedCollection).collection.ManifestText))
        }
        return size
 }
 
-func (c *cache) lookupCollection(pdh string) map[string]interface{} {
+func (c *cache) lookupCollection(pdh string) *arvados.Collection {
        if pdh == "" {
                return nil
        } else if ent, cached := c.collections.Get(pdh); !cached {
index f8aa2b1c60e095198719a7028de3a9cbde7fe12a..7d372085906a0551f4c0122fecdf275944a57f77 100644 (file)
@@ -20,8 +20,8 @@ func (s *UnitSuite) TestCache(c *check.C) {
                coll, err := cache.Get(arv, arvadostest.FooCollection, false)
                c.Check(err, check.Equals, nil)
                c.Assert(coll, check.NotNil)
-               c.Check(coll["portable_data_hash"], check.Equals, arvadostest.FooPdh)
-               c.Check(coll["manifest_text"].(string)[:2], check.Equals, ". ")
+               c.Check(coll.PortableDataHash, check.Equals, arvadostest.FooPdh)
+               c.Check(coll.ManifestText[:2], check.Equals, ". ")
        }
        c.Check(cache.Stats().Requests, check.Equals, uint64(5))
        c.Check(cache.Stats().CollectionHits, check.Equals, uint64(4))
@@ -38,8 +38,8 @@ func (s *UnitSuite) TestCache(c *check.C) {
                coll, err := cache.Get(arv, arvadostest.FooPdh, false)
                c.Check(err, check.Equals, nil)
                c.Assert(coll, check.NotNil)
-               c.Check(coll["portable_data_hash"], check.Equals, arvadostest.FooPdh)
-               c.Check(coll["manifest_text"].(string)[:2], check.Equals, ". ")
+               c.Check(coll.PortableDataHash, check.Equals, arvadostest.FooPdh)
+               c.Check(coll.ManifestText[:2], check.Equals, ". ")
        }
        c.Check(cache.Stats().Requests, check.Equals, uint64(5+2))
        c.Check(cache.Stats().CollectionHits, check.Equals, uint64(4+2))
@@ -97,8 +97,8 @@ func (s *UnitSuite) TestCacheForceReloadByUUID(c *check.C) {
        }
 
        c.Check(cache.Stats().Requests, check.Equals, uint64(4))
-       c.Check(cache.Stats().CollectionHits, check.Equals, uint64(1))
+       c.Check(cache.Stats().CollectionHits, check.Equals, uint64(3))
        c.Check(cache.Stats().PermissionHits, check.Equals, uint64(1))
-       c.Check(cache.Stats().PDHHits, check.Equals, uint64(1))
+       c.Check(cache.Stats().PDHHits, check.Equals, uint64(3))
        c.Check(cache.Stats().APICalls, check.Equals, uint64(3))
 }
index 42c37b8eebf947bea060ef2ace9a68b1fcca67ad..4b17e8648efe60cd7f298c5e926b91f2efed356a 100644 (file)
@@ -4,16 +4,17 @@ import (
        "encoding/json"
        "fmt"
        "html"
+       "html/template"
        "io"
        "net/http"
        "net/url"
        "os"
-       "path"
+       "sort"
        "strconv"
        "strings"
        "sync"
-       "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/auth"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
@@ -142,8 +143,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
+       var stripParts int
        var targetID string
-       var targetPath []string
        var tokens []string
        var reqTokens []string
        var pathToken bool
@@ -160,26 +161,25 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
-               targetPath = pathParts
        } else if r.URL.Path == "/status.json" {
                h.serveStatus(w, r)
                return
-       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
-               // /c=ID/PATH...
+       } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
+               // /c=ID[/PATH...]
                targetID = parseCollectionIDFromURL(pathParts[0][2:])
-               targetPath = pathParts[1:]
-       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
-               if len(pathParts) >= 5 && pathParts[1] == "download" {
+               stripParts = 1
+       } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
+               if len(pathParts) >= 4 && pathParts[1] == "download" {
                        // /collections/download/ID/TOKEN/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[2])
                        tokens = []string{pathParts[3]}
-                       targetPath = pathParts[4:]
+                       stripParts = 4
                        pathToken = true
                } else {
                        // /collections/ID/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[1])
                        tokens = h.Config.AnonymousTokens
-                       targetPath = pathParts[2:]
+                       stripParts = 2
                }
        }
 
@@ -210,56 +210,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // token in an HttpOnly cookie, and redirect to the
                // same URL with the query param redacted and method =
                // GET.
-
-               if !credentialsOK {
-                       // It is not safe to copy the provided token
-                       // into a cookie unless the current vhost
-                       // (origin) serves only a single collection or
-                       // we are in TrustAllContent mode.
-                       statusCode = http.StatusBadRequest
-                       return
-               }
-
-               // The HttpOnly flag is necessary to prevent
-               // JavaScript code (included in, or loaded by, a page
-               // in the collection being served) from employing the
-               // user's token beyond reading other files in the same
-               // domain, i.e., same collection.
-               //
-               // The 303 redirect is necessary in the case of a GET
-               // request to avoid exposing the token in the Location
-               // bar, and in the case of a POST request to avoid
-               // raising warnings when the user refreshes the
-               // resulting page.
-
-               http.SetCookie(w, &http.Cookie{
-                       Name:     "arvados_api_token",
-                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
-                       Path:     "/",
-                       HttpOnly: true,
-               })
-
-               // Propagate query parameters (except api_token) from
-               // the original request.
-               redirQuery := r.URL.Query()
-               redirQuery.Del("api_token")
-
-               redir := (&url.URL{
-                       Host:     r.Host,
-                       Path:     r.URL.Path,
-                       RawQuery: redirQuery.Encode(),
-               }).String()
-
-               w.Header().Add("Location", redir)
-               statusCode, statusText = http.StatusSeeOther, redir
-               w.WriteHeader(statusCode)
-               io.WriteString(w, `<A href="`)
-               io.WriteString(w, html.EscapeString(redir))
-               io.WriteString(w, `">Continue</A>`)
+               h.seeOtherWithCookie(w, r, "", credentialsOK)
                return
        }
 
-       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+       targetPath := pathParts[stripParts:]
+       if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
                // /c=ID/t=TOKEN/PATH...
                //
@@ -269,6 +225,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                tokens = []string{targetPath[0][2:]}
                pathToken = true
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        if tokens == nil {
@@ -286,6 +243,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // //collections.example/t=foo/ won't work because
                // t=foo will be interpreted as a token "foo".
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        forceReload := false
@@ -293,15 +251,13 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                forceReload = true
        }
 
-       var collection map[string]interface{}
+       var collection *arvados.Collection
        tokenResult := make(map[string]int)
-       found := false
        for _, arv.ApiToken = range tokens {
                var err error
                collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
                if err == nil {
                        // Success
-                       found = true
                        break
                }
                if srvErr, ok := err.(arvadosclient.APIServerError); ok {
@@ -317,7 +273,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       if !found {
+       if collection == nil {
                if pathToken || !credentialsOK {
                        // Either the URL is a "secret sharing link"
                        // that didn't work out (and asking the client
@@ -349,31 +305,175 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       filename := strings.Join(targetPath, "/")
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       rdr, err := kc.CollectionFileReader(collection, filename)
-       if os.IsNotExist(err) {
+
+       basename := targetPath[len(targetPath)-1]
+       applyContentDispositionHdr(w, r, basename, attachment)
+
+       fs := collection.FileSystem(&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }, kc)
+       openPath := "/" + strings.Join(targetPath, "/")
+       if f, err := fs.Open(openPath); os.IsNotExist(err) {
+               // Requested non-existent path
                statusCode = http.StatusNotFound
-               return
        } else if err != nil {
-               statusCode, statusText = http.StatusBadGateway, err.Error()
-               return
+               // Some other (unexpected) error
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat, err := f.Stat(); err != nil {
+               // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
+               // If client requests ".../dirname", redirect to
+               // ".../dirname/". This way, relative links in the
+               // listing for "dirname" can always be "fnm", never
+               // "dirname/fnm".
+               h.seeOtherWithCookie(w, r, basename+"/", credentialsOK)
+       } else if stat.IsDir() {
+               h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
+       } else {
+               http.ServeContent(w, r, basename, stat.ModTime(), f)
+               if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
+                       // If we wrote fewer bytes than expected, it's
+                       // too late to change the real response code
+                       // or send an error message to the client, but
+                       // at least we can try to put some useful
+                       // debugging info in the logs.
+                       n, err := f.Read(make([]byte, 1024))
+                       statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
+
+               }
        }
-       defer rdr.Close()
+}
 
-       basename := path.Base(filename)
-       applyContentDispositionHdr(w, r, basename, attachment)
+var dirListingTemplate = `<!DOCTYPE HTML>
+<HTML><HEAD>
+  <META name="robots" content="NOINDEX">
+  <TITLE>{{ .Collection.Name }}</TITLE>
+  <STYLE type="text/css">
+    body {
+      margin: 1.5em;
+    }
+    pre {
+      background-color: #D9EDF7;
+      border-radius: .25em;
+      padding: .75em;
+      overflow: auto;
+    }
+    .footer p {
+      font-size: 82%;
+    }
+    ul {
+      padding: 0;
+    }
+    ul li {
+      font-family: monospace;
+      list-style: none;
+    }
+  </STYLE>
+</HEAD>
+<BODY>
+<H1>{{ .CollectionName }}</H1>
+
+<P>This collection of data files is being shared with you through
+Arvados.  You can download individual files listed below.  To download
+the entire collection with wget, try:</P>
+
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
+
+<H2>File Listing</H2>
+
+{{if .Files}}
+<UL>
+{{range .Files}}  <LI>{{.Size | printf "%15d  " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
+</UL>
+{{else}}
+<P>(No files; this collection is empty.)</P>
+{{end}}
+
+<HR noshade>
+<DIV class="footer">
+  <P>
+    About Arvados:
+    Arvados is a free and open source software bioinformatics platform.
+    To learn more, visit arvados.org.
+    Arvados is not responsible for the files listed on this page.
+  </P>
+</DIV>
+
+</BODY>
+`
+
+type fileListEnt struct {
+       Name string
+       Size int64
+}
+
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
+       var files []fileListEnt
+       var walk func(string) error
+       if !strings.HasSuffix(base, "/") {
+               base = base + "/"
+       }
+       walk = func(path string) error {
+               dirname := base + path
+               if dirname != "/" {
+                       dirname = strings.TrimSuffix(dirname, "/")
+               }
+               d, err := fs.Open(dirname)
+               if err != nil {
+                       return err
+               }
+               ents, err := d.Readdir(-1)
+               if err != nil {
+                       return err
+               }
+               for _, ent := range ents {
+                       if ent.IsDir() {
+                               err = walk(path + ent.Name() + "/")
+                               if err != nil {
+                                       return err
+                               }
+                       } else {
+                               files = append(files, fileListEnt{
+                                       Name: path + ent.Name(),
+                                       Size: ent.Size(),
+                               })
+                       }
+               }
+               return nil
+       }
+       if err := walk(""); err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
 
-       modstr, _ := collection["modified_at"].(string)
-       modtime, err := time.Parse(time.RFC3339Nano, modstr)
+       funcs := template.FuncMap{
+               "nbsp": func(s string) template.HTML {
+                       return template.HTML(strings.Replace(s, " ", "&nbsp;", -1))
+               },
+       }
+       tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
        if err != nil {
-               modtime = time.Now()
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
        }
-       http.ServeContent(w, r, basename, modtime, rdr)
+       sort.Slice(files, func(i, j int) bool {
+               return files[i].Name < files[j].Name
+       })
+       w.WriteHeader(http.StatusOK)
+       tmpl.Execute(w, map[string]interface{}{
+               "CollectionName": collectionName,
+               "Files":          files,
+               "Request":        r,
+               "StripParts":     stripParts,
+       })
 }
 
 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
@@ -393,3 +493,61 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
                w.Header().Set("Content-Disposition", disposition)
        }
 }
+
+func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
+       if !credentialsOK {
+               // It is not safe to copy the provided token
+               // into a cookie unless the current vhost
+               // (origin) serves only a single collection or
+               // we are in TrustAllContent mode.
+               w.WriteHeader(http.StatusBadRequest)
+               return
+       }
+
+       if formToken := r.FormValue("api_token"); formToken != "" {
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+       }
+
+       // Propagate query parameters (except api_token) from
+       // the original request.
+       redirQuery := r.URL.Query()
+       redirQuery.Del("api_token")
+
+       u := r.URL
+       if location != "" {
+               newu, err := u.Parse(location)
+               if err != nil {
+                       w.WriteHeader(http.StatusInternalServerError)
+                       return
+               }
+               u = newu
+       }
+       redir := (&url.URL{
+               Host:     r.Host,
+               Path:     u.Path,
+               RawQuery: redirQuery.Encode(),
+       }).String()
+
+       w.Header().Add("Location", redir)
+       w.WriteHeader(http.StatusSeeOther)
+       io.WriteString(w, `<A href="`)
+       io.WriteString(w, html.EscapeString(redir))
+       io.WriteString(w, `">Continue</A>`)
+}
index df0346ba315f420ce5aae5dd2d3a59c06e4e87fb..d3946590339bbc071476aaadb957dbd1a607b09c 100644 (file)
@@ -1,6 +1,7 @@
 package main
 
 import (
+       "fmt"
        "html"
        "io/ioutil"
        "net/http"
@@ -479,3 +480,108 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
        c.Check(resp.Header().Get("Location"), check.Equals, "")
        return resp
 }
+
+func (s *IntegrationSuite) TestDirectoryListing(c *check.C) {
+       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       authHeader := http.Header{
+               "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
+       }
+       for _, trial := range []struct {
+               uri     string
+               header  http.Header
+               expect  []string
+               cutDirs int
+       }{
+               {
+                       uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/",
+                       header:  authHeader,
+                       expect:  []string{"dir1/foo", "dir1/bar"},
+                       cutDirs: 0,
+               },
+               {
+                       uri:     strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/dir1/",
+                       header:  authHeader,
+                       expect:  []string{"foo", "bar"},
+                       cutDirs: 0,
+               },
+               {
+                       uri:     "download.example.com/collections/" + arvadostest.FooAndBarFilesInDirUUID + "/",
+                       header:  authHeader,
+                       expect:  []string{"dir1/foo", "dir1/bar"},
+                       cutDirs: 2,
+               },
+               {
+                       uri:     "collections.example.com/collections/download/" + arvadostest.FooAndBarFilesInDirUUID + "/" + arvadostest.ActiveToken + "/",
+                       header:  nil,
+                       expect:  []string{"dir1/foo", "dir1/bar"},
+                       cutDirs: 4,
+               },
+               {
+                       uri:     "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken + "/",
+                       header:  nil,
+                       expect:  []string{"dir1/foo", "dir1/bar"},
+                       cutDirs: 2,
+               },
+               {
+                       uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1/",
+                       header:  authHeader,
+                       expect:  []string{"foo", "bar"},
+                       cutDirs: 1,
+               },
+               {
+                       uri:     "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/_/dir1/",
+                       header:  authHeader,
+                       expect:  []string{"foo", "bar"},
+                       cutDirs: 2,
+               },
+               {
+                       uri:     arvadostest.FooAndBarFilesInDirUUID + ".example.com/dir1?api_token=" + arvadostest.ActiveToken,
+                       header:  authHeader,
+                       expect:  []string{"foo", "bar"},
+                       cutDirs: 0,
+               },
+               {
+                       uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/theperthcountyconspiracydoesnotexist/",
+                       header: authHeader,
+                       expect: nil,
+               },
+       } {
+               c.Logf("%q => %q", trial.uri, trial.expect)
+               resp := httptest.NewRecorder()
+               u := mustParseURL("//" + trial.uri)
+               req := &http.Request{
+                       Method:     "GET",
+                       Host:       u.Host,
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
+                       Header:     trial.header,
+               }
+               s.testServer.Handler.ServeHTTP(resp, req)
+               var cookies []*http.Cookie
+               for resp.Code == http.StatusSeeOther {
+                       u, _ := req.URL.Parse(resp.Header().Get("Location"))
+                       req = &http.Request{
+                               Method:     "GET",
+                               Host:       u.Host,
+                               URL:        u,
+                               RequestURI: u.RequestURI(),
+                               Header:     http.Header{},
+                       }
+                       cookies = append(cookies, (&http.Response{Header: resp.Header()}).Cookies()...)
+                       for _, c := range cookies {
+                               req.AddCookie(c)
+                       }
+                       resp = httptest.NewRecorder()
+                       s.testServer.Handler.ServeHTTP(resp, req)
+               }
+               if trial.expect == nil {
+                       c.Check(resp.Code, check.Equals, http.StatusNotFound)
+               } else {
+                       c.Check(resp.Code, check.Equals, http.StatusOK)
+                       for _, e := range trial.expect {
+                               c.Check(resp.Body.String(), check.Matches, `(?ms).*href="`+e+`".*`)
+                       }
+                       c.Check(resp.Body.String(), check.Matches, `(?ms).*--cut-dirs=`+fmt.Sprintf("%d", trial.cutDirs)+` .*`)
+               }
+       }
+}
index 52fe459ec43ff13c422c1679017de68246a36d1d..500561d69c74a41d18f923c982a7b8397030bf67 100644 (file)
@@ -77,7 +77,9 @@ func (s *IntegrationSuite) Test404(c *check.C) {
        } {
                hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
                c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
-               c.Check(body, check.Equals, "")
+               if len(body) > 0 {
+                       c.Check(body, check.Equals, "404 page not found\n")
+               }
        }
 }
 
index 8deabbd50a6163da537193d0df39ac720f1d04d0..d6b877c27cdef137b469b7a06027712c3ac8986c 100644 (file)
@@ -67,8 +67,25 @@ class ComputeNodeDriver(BaseComputeNodeDriver):
     create_cloud_name = staticmethod(arvados_node_fqdn)
 
     def arvados_create_kwargs(self, size, arvados_node):
-        return {'name': self.create_cloud_name(arvados_node),
+        kw = {'name': self.create_cloud_name(arvados_node),
                 'ex_userdata': self._make_ping_url(arvados_node)}
+        # libcloud/ec2 disk sizes are in GB, Arvados/SLURM "scratch" value is in MB
+        scratch = int(size.scratch / 1000) + 1
+        if scratch > size.disk:
+            volsize = scratch - size.disk
+            if volsize > 16384:
+                # Must be 1-16384 for General Purpose SSD (gp2) devices
+                # https://docs.aws.amazon.com/AWSEC2/latest/APIReference/API_EbsBlockDevice.html
+                self._logger.warning("Requested EBS volume size %d is too large, capping size request to 16384 GB", volsize)
+                volsize = 16384
+            kw["ex_blockdevicemappings"] = [{
+                "DeviceName": "/dev/xvdt",
+                "Ebs": {
+                    "DeleteOnTermination": True,
+                    "VolumeSize": volsize,
+                    "VolumeType": "gp2"
+                }}]
+        return kw
 
     def post_create_node(self, cloud_node):
         self.real.ex_create_tags(cloud_node, self.tags)
index 7e63c782ede1fecee931d088505aed549a21c9df..5fc56739b9a2304ebbb3c10533afd78d259eb442 100644 (file)
@@ -387,7 +387,7 @@ class NodeManagerDaemonActor(actor_class):
             arvados_client=self._new_arvados(),
             arvados_node=arvados_node,
             cloud_client=self._new_cloud(),
-            cloud_size=cloud_size).proxy()
+            cloud_size=self.server_calculator.find_size(cloud_size.id)).proxy()
         self.booting[new_setup.actor_ref.actor_urn] = new_setup
         self.sizes_booting[new_setup.actor_ref.actor_urn] = cloud_size
 
index 8e7cf2ffcca77d78d176cdaa1091536b6ac6ddc6..1e150028173916dde0535ae513d7be60065be61d 100644 (file)
@@ -1,6 +1,9 @@
 import re
 import urllib
 import ssl
+import time
+
+from arvnodeman.computenode import ARVADOS_TIMEFMT
 
 from libcloud.compute.base import NodeSize, Node, NodeDriver, NodeState
 from libcloud.common.exceptions import BaseHTTPError
@@ -29,12 +32,16 @@ class FakeDriver(NodeDriver):
                     ex_resource_group=None,
                     ex_user_name=None,
                     ex_tags=None,
-                    ex_network=None):
+                    ex_network=None,
+                    ex_userdata=None):
         global all_nodes, create_calls
         create_calls += 1
         n = Node(name, name, NodeState.RUNNING, [], [], self, size=size, extra={"tags": ex_tags})
         all_nodes.append(n)
-        ping_url = re.search(r"echo '(.*)' > /var/tmp/arv-node-data/arv-ping-url", ex_customdata).groups(1)[0] + "&instance_id=" + name
+        if ex_customdata:
+            ping_url = re.search(r"echo '(.*)' > /var/tmp/arv-node-data/arv-ping-url", ex_customdata).groups(1)[0] + "&instance_id=" + name
+        if ex_userdata:
+            ping_url = ex_userdata
         ctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         ctx.verify_mode = ssl.CERT_NONE
         f = urllib.urlopen(ping_url, "", context=ctx)
@@ -125,3 +132,24 @@ class RetryDriver(FakeDriver):
                     ex_user_name=ex_user_name,
                     ex_tags=ex_tags,
                     ex_network=ex_network)
+
+class FakeAwsDriver(FakeDriver):
+
+    def create_node(self, name=None,
+                    size=None,
+                    image=None,
+                    auth=None,
+                    ex_userdata=None,
+                    ex_blockdevicemappings=None):
+        n = super(FakeAwsDriver, self).create_node(name=name,
+                                                      size=size,
+                                                      image=image,
+                                                      auth=auth,
+                                                      ex_userdata=ex_userdata)
+        n.extra = {"launch_time": time.strftime(ARVADOS_TIMEFMT, time.gmtime())[:-1]}
+        return n
+
+    def list_sizes(self, **kwargs):
+        return [NodeSize("m3.xlarge", "Extra Large Instance", 3500, 80, 0, 0, self),
+                NodeSize("m4.xlarge", "Extra Large Instance", 3500, 0, 0, 0, self),
+                NodeSize("m4.2xlarge", "Double Extra Large Instance", 7000, 0, 0, 0, self)]
index 5eb923eb93079cc28bc0d1836c9f4b6dea6635a2..93431914b5f2ae6dc134b342fd1cb1d9902dbd7e 100644 (file)
@@ -29,7 +29,7 @@ setup(name='arvados-node-manager',
           ('share/doc/arvados-node-manager', ['agpl-3.0.txt', 'README.rst']),
       ],
       install_requires=[
-          'apache-libcloud>=0.16',
+          'apache-libcloud>=0.20',
           'arvados-python-client>=0.1.20150206225333',
           'future',
           'pykka',
@@ -37,14 +37,14 @@ setup(name='arvados-node-manager',
           'setuptools'
       ],
       dependency_links=[
-          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.18.1.dev4.zip"
+          "https://github.com/curoverse/libcloud/archive/apache-libcloud-0.20.2.dev3.zip"
       ],
       test_suite='tests',
       tests_require=[
           'requests',
           'pbr<1.7.0',
           'mock>=1.0',
-          'apache-libcloud==0.18.1.dev4',
+          'apache-libcloud==0.20.2.dev3',
       ],
       zip_safe=False,
       cmdclass={'egg_info': tagger},
index 14df3602313f8e1c249811395d0809dc57f961ee..8da55a1f474518f26d7b668d189d1a3757eadcf6 100644 (file)
@@ -96,3 +96,53 @@ class EC2ComputeNodeDriverTestCase(testutil.DriverTestMixin, unittest.TestCase):
         node = testutil.cloud_node_mock()
         node.name = name
         self.assertEqual(name, ec2.ComputeNodeDriver.node_fqdn(node))
+
+    def test_create_ebs_volume(self):
+        arv_node = testutil.arvados_node_mock()
+        driver = self.new_driver()
+        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
+        size = testutil.MockSize(1)
+        size.disk=5
+        size.scratch=20000
+        driver.create_node(size, arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertEqual([{
+            "DeviceName": "/dev/xvdt",
+            "Ebs": {
+                "DeleteOnTermination": True,
+                "VolumeSize": 16,
+                "VolumeType": "gp2"
+            }}],
+                         create_method.call_args[1].get('ex_blockdevicemappings'))
+
+    def test_ebs_volume_not_needed(self):
+        arv_node = testutil.arvados_node_mock()
+        driver = self.new_driver()
+        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
+        size = testutil.MockSize(1)
+        size.disk=80
+        size.scratch=20000
+        driver.create_node(size, arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertIsNone(create_method.call_args[1].get('ex_blockdevicemappings'))
+
+    def test_ebs_volume_too_big(self):
+        arv_node = testutil.arvados_node_mock()
+        driver = self.new_driver()
+        # libcloud/ec2 "disk" sizes are in GB, Arvados/SLURM "scratch" value is in MB
+        size = testutil.MockSize(1)
+        size.disk=80
+        size.scratch=20000000
+        driver.create_node(size, arv_node)
+        create_method = self.driver_mock().create_node
+        self.assertTrue(create_method.called)
+        self.assertEqual([{
+            "DeviceName": "/dev/xvdt",
+            "Ebs": {
+                "DeleteOnTermination": True,
+                "VolumeSize": 16384,
+                "VolumeType": "gp2"
+            }}],
+                         create_method.call_args[1].get('ex_blockdevicemappings'))
index 41f4ed135563e7868c37831a91a648f09053c7f0..1ee76900bf7989a5fa66bbcd302261c2dcdca264 100644 (file)
@@ -79,7 +79,8 @@ class MockSize(object):
         self.id = 'z{}.test'.format(factor)
         self.name = self.id
         self.ram = 128 * factor
-        self.disk = 100 * factor
+        self.disk = factor   # GB
+        self.scratch = 1000 * factor # MB
         self.bandwidth = 16 * factor
         self.price = float(factor)
         self.extra = {}
index 395129d21b76e6e7aea027e79feb402afd09d2fc..f7ef693df83cbc6ca15c43c07c34aada6a1b15dc 100755 (executable)
@@ -124,7 +124,7 @@ run() {
         fi
     fi
 
-    if test ! -z "$TAG"
+    if test -n "$TAG"
     then
         if test $(echo $TAG | cut -c1-1) != '-' ; then
            TAG=":$TAG"
index 424bbf17dd4f437f7b722a2ab443d1a0202aeda2..eff1e7815e3bf6ce16070be453aeab4e6449fbef 100644 (file)
@@ -2,8 +2,7 @@ FROM debian:8
 
 ENV DEBIAN_FRONTEND noninteractive
 
-RUN apt-get clean && \
-    apt-get update && \
+RUN apt-get update && \
     apt-get -yq --no-install-recommends -o Acquire::Retries=6 install \
     postgresql-9.4 git build-essential runit curl libpq-dev \
     libcurl4-openssl-dev libssl-dev zlib1g-dev libpcre3-dev \
@@ -18,7 +17,7 @@ RUN apt-get clean && \
     libgnutls28-dev python3-dev && \
     apt-get clean
 
-ENV GOVERSION 1.7.5
+ENV GOVERSION 1.8.3
 
 # Install golang binary
 RUN curl -f http://storage.googleapis.com/golang/go${GOVERSION}.linux-amd64.tar.gz | \