8784: Add keep-web directory listings via (*arvados.Collection)FileSystem().
authorTom Clegg <tom@curoverse.com>
Wed, 7 Jun 2017 21:36:08 +0000 (17:36 -0400)
committerTom Clegg <tom@curoverse.com>
Tue, 13 Jun 2017 14:48:17 +0000 (10:48 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curoverse.com>

sdk/go/arvados/collection_fs.go [new file with mode: 0644]
sdk/go/arvados/collection_fs_test.go [new file with mode: 0644]
sdk/go/arvadostest/fixtures.go
sdk/go/keepclient/collectionreader.go
services/crunch-run/crunchrun.go
services/keep-web/handler.go
services/keep-web/handler_test.go
services/keep-web/server_test.go

diff --git a/sdk/go/arvados/collection_fs.go b/sdk/go/arvados/collection_fs.go
new file mode 100644 (file)
index 0000000..01d5b65
--- /dev/null
@@ -0,0 +1,235 @@
+package arvados
+
+import (
+       "io"
+       "net/http"
+       "os"
+       "path"
+       "strings"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/manifest"
+)
+
+type File interface {
+       io.Reader
+       io.Closer
+       io.Seeker
+       Size() int64
+}
+
+type keepClient interface {
+       ManifestFileReader(manifest.Manifest, string) (File, error)
+}
+
+type collectionFile struct {
+       File
+       collection *Collection
+       name       string
+       size       int64
+}
+
+func (cf *collectionFile) Size() int64 {
+       return cf.size
+}
+
+func (cf *collectionFile) Readdir(count int) ([]os.FileInfo, error) {
+       return nil, io.EOF
+}
+
+func (cf *collectionFile) Stat() (os.FileInfo, error) {
+       return collectionDirent{
+               collection: cf.collection,
+               name:       cf.name,
+               size:       cf.size,
+               isDir:      false,
+       }, nil
+}
+
+type collectionDir struct {
+       collection *Collection
+       stream     string
+       dirents    []os.FileInfo
+}
+
+// Readdir implements os.File.
+func (cd *collectionDir) Readdir(count int) ([]os.FileInfo, error) {
+       ret := cd.dirents
+       if count <= 0 {
+               cd.dirents = nil
+               return ret, nil
+       } else if len(ret) == 0 {
+               return nil, io.EOF
+       }
+       if count > len(ret) {
+               count = len(ret)
+       }
+       cd.dirents = cd.dirents[count:]
+       return ret[:count], nil
+}
+
+// Stat implements os.File.
+func (cd *collectionDir) Stat() (os.FileInfo, error) {
+       return collectionDirent{
+               collection: cd.collection,
+               name:       path.Base(cd.stream),
+               isDir:      true,
+               size:       int64(len(cd.dirents)),
+       }, nil
+}
+
+// Close implements os.File.
+func (cd *collectionDir) Close() error {
+       return nil
+}
+
+// Read implements os.File.
+func (cd *collectionDir) Read([]byte) (int, error) {
+       return 0, nil
+}
+
+// Seek implements os.File.
+func (cd *collectionDir) Seek(int64, int) (int64, error) {
+       return 0, nil
+}
+
+// collectionDirent implements os.FileInfo.
+type collectionDirent struct {
+       collection *Collection
+       name       string
+       isDir      bool
+       mode       os.FileMode
+       size       int64
+}
+
+// Name implements os.FileInfo.
+func (e collectionDirent) Name() string {
+       return e.name
+}
+
+// ModTime implements os.FileInfo.
+func (e collectionDirent) ModTime() time.Time {
+       if e.collection.ModifiedAt == nil {
+               return time.Now()
+       }
+       return *e.collection.ModifiedAt
+}
+
+// Mode implements os.FileInfo.
+func (e collectionDirent) Mode() os.FileMode {
+       if e.isDir {
+               return 0555
+       } else {
+               return 0444
+       }
+}
+
+// IsDir implements os.FileInfo.
+func (e collectionDirent) IsDir() bool {
+       return e.isDir
+}
+
+// Size implements os.FileInfo.
+func (e collectionDirent) Size() int64 {
+       return e.size
+}
+
+// Sys implements os.FileInfo.
+func (e collectionDirent) Sys() interface{} {
+       return nil
+}
+
+// collectionFS implements http.FileSystem.
+type collectionFS struct {
+       collection *Collection
+       client     *Client
+       kc         keepClient
+}
+
+// FileSystem returns an http.FileSystem for the collection.
+func (c *Collection) FileSystem(client *Client, kc keepClient) http.FileSystem {
+       return &collectionFS{
+               collection: c,
+               client:     client,
+               kc:         kc,
+       }
+}
+
+func (c *collectionFS) Open(name string) (http.File, error) {
+       // Ensure name looks the way it does in a manifest.
+       name = path.Clean("/" + name)
+       if name == "/" || name == "./" {
+               name = "."
+       } else if strings.HasPrefix(name, "/") {
+               name = "." + name
+       }
+
+       m := manifest.Manifest{Text: c.collection.ManifestText}
+
+       filesizes := c.fileSizes()
+
+       // Return a file if it exists.
+       if size, ok := filesizes[name]; ok {
+               reader, err := c.kc.ManifestFileReader(m, name)
+               if err != nil {
+                       return nil, err
+               }
+               return &collectionFile{
+                       File:       reader,
+                       collection: c.collection,
+                       name:       path.Base(name),
+                       size:       size,
+               }, nil
+       }
+
+       // Return a directory if it's the root dir or there are file
+       // entries below it.
+       children := map[string]collectionDirent{}
+       for fnm, size := range filesizes {
+               if fnm == name {
+               }
+               if !strings.HasPrefix(fnm, name+"/") {
+                       continue
+               }
+               isDir := false
+               ent := fnm[len(name)+1:]
+               if i := strings.Index(ent, "/"); i >= 0 {
+                       ent = ent[:i]
+                       isDir = true
+               }
+               e := children[ent]
+               e.collection = c.collection
+               e.isDir = isDir
+               e.name = ent
+               e.size = size
+               children[ent] = e
+       }
+       if len(children) == 0 && name != "." {
+               return nil, os.ErrNotExist
+       }
+       dirents := make([]os.FileInfo, 0, len(children))
+       for _, ent := range children {
+               dirents = append(dirents, ent)
+       }
+       return &collectionDir{
+               collection: c.collection,
+               stream:     name,
+               dirents:    dirents,
+       }, nil
+}
+
+// fileSizes returns a map of files that can be opened. Each key
+// starts with "./".
+func (c *collectionFS) fileSizes() map[string]int64 {
+       var sizes map[string]int64
+       m := manifest.Manifest{Text: c.collection.ManifestText}
+       for ms := range m.StreamIter() {
+               for _, fss := range ms.FileStreamSegments {
+                       if sizes == nil {
+                               sizes = map[string]int64{}
+                       }
+                       sizes[ms.StreamName+"/"+fss.Name] += int64(fss.SegLen)
+               }
+       }
+       return sizes
+}
diff --git a/sdk/go/arvados/collection_fs_test.go b/sdk/go/arvados/collection_fs_test.go
new file mode 100644 (file)
index 0000000..8cfd21e
--- /dev/null
@@ -0,0 +1,118 @@
+package arvados
+
+import (
+       "io"
+       "net/http"
+       "os"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&CollectionFSSuite{})
+
+type CollectionFSSuite struct {
+       client *Client
+       coll   Collection
+       fs     http.FileSystem
+}
+
+func (s *CollectionFSSuite) SetUpTest(c *check.C) {
+       s.client = NewClientFromEnv()
+       err := s.client.RequestAndDecode(&s.coll, "GET", "arvados/v1/collections/"+arvadostest.FooAndBarFilesInDirUUID, nil, nil)
+       c.Assert(err, check.IsNil)
+       s.fs = s.coll.FileSystem(s.client, nil)
+}
+
+func (s *CollectionFSSuite) TestReaddirFull(c *check.C) {
+       f, err := s.fs.Open("/dir1")
+       c.Assert(err, check.IsNil)
+
+       st, err := f.Stat()
+       c.Assert(err, check.IsNil)
+       c.Check(st.Size(), check.Equals, int64(2))
+       c.Check(st.IsDir(), check.Equals, true)
+
+       fis, err := f.Readdir(0)
+       c.Check(err, check.IsNil)
+       c.Check(len(fis), check.Equals, 2)
+       if len(fis) > 0 {
+               c.Check(fis[0].Size(), check.Equals, int64(3))
+       }
+}
+
+func (s *CollectionFSSuite) TestReaddirLimited(c *check.C) {
+       f, err := s.fs.Open("./dir1")
+       c.Assert(err, check.IsNil)
+       for i := 0; i < 2; i++ {
+               fis, err := f.Readdir(1)
+               c.Check(err, check.IsNil)
+               c.Check(len(fis), check.Equals, 1)
+               if len(fis) > 0 {
+                       c.Check(fis[0].Size(), check.Equals, int64(3))
+               }
+       }
+       fis, err := f.Readdir(1)
+       c.Check(len(fis), check.Equals, 0)
+       c.Check(err, check.NotNil)
+       c.Check(err, check.Equals, io.EOF)
+
+       f, err = s.fs.Open("dir1")
+       c.Assert(err, check.IsNil)
+       fis, err = f.Readdir(1)
+       c.Check(len(fis), check.Equals, 1)
+       c.Assert(err, check.IsNil)
+       fis, err = f.Readdir(2)
+       c.Check(len(fis), check.Equals, 1)
+       c.Assert(err, check.IsNil)
+       fis, err = f.Readdir(2)
+       c.Check(len(fis), check.Equals, 0)
+       c.Assert(err, check.Equals, io.EOF)
+}
+
+func (s *CollectionFSSuite) TestPathMunge(c *check.C) {
+       for _, path := range []string{".", "/", "./", "///", "/../", "/./.."} {
+               f, err := s.fs.Open(path)
+               c.Assert(err, check.IsNil)
+
+               st, err := f.Stat()
+               c.Assert(err, check.IsNil)
+               c.Check(st.Size(), check.Equals, int64(1))
+               c.Check(st.IsDir(), check.Equals, true)
+       }
+       for _, path := range []string{"/dir1", "dir1", "./dir1", "///dir1//.//", "../dir1/../dir1/"} {
+               c.Logf("%q", path)
+               f, err := s.fs.Open(path)
+               c.Assert(err, check.IsNil)
+
+               st, err := f.Stat()
+               c.Assert(err, check.IsNil)
+               c.Check(st.Size(), check.Equals, int64(2))
+               c.Check(st.IsDir(), check.Equals, true)
+       }
+}
+
+func (s *CollectionFSSuite) TestNotExist(c *check.C) {
+       for _, path := range []string{"/no", "no", "./no", "n/o", "/n/o"} {
+               f, err := s.fs.Open(path)
+               c.Assert(f, check.IsNil)
+               c.Assert(err, check.NotNil)
+               c.Assert(os.IsNotExist(err), check.Equals, true)
+       }
+}
+
+func (s *CollectionFSSuite) TestOpenFile(c *check.C) {
+       c.Skip("cannot test files with nil keepclient")
+
+       f, err := s.fs.Open("/foo.txt")
+       c.Assert(err, check.IsNil)
+       st, err := f.Stat()
+       c.Assert(err, check.IsNil)
+       c.Check(st.Size(), check.Equals, int64(3))
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
index 299d18638a0fda75d5f170ca8609c5e1c0e5f7e1..7e21da4982b3ecb2325f3117008e15a7a8513a7d 100644 (file)
@@ -18,6 +18,9 @@ const (
        FooPdh                  = "1f4b0bc7583c2a7f9102c395f4ffc5e3+45"
        HelloWorldPdh           = "55713e6a34081eb03609e7ad5fcad129+62"
 
+       FooAndBarFilesInDirUUID = "zzzzz-4zz18-foonbarfilesdir"
+       FooAndBarFilesInDirPDH  = "6bbac24198d09a93975f60098caf0bdf+62"
+
        Dispatch1Token    = "kwi8oowusvbutahacwk2geulqewy5oaqmpalczfna4b6bb0hfw"
        Dispatch1AuthUUID = "zzzzz-gj3su-k9dvestay1plssr"
 
index 344a70c50bf09798e27bb41187fbdcb4d8d35b6e..527318eb49c67046ca86bf2183eceb75d3bcc157 100644 (file)
@@ -6,19 +6,10 @@ import (
        "io"
        "os"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/manifest"
 )
 
-// A Reader implements, io.Reader, io.Seeker, and io.Closer, and has a
-// Len() method that returns the total number of bytes available to
-// read.
-type Reader interface {
-       io.Reader
-       io.Seeker
-       io.Closer
-       Len() uint64
-}
-
 const (
        // After reading a data block from Keep, cfReader slices it up
        // and sends the slices to a buffered channel to be consumed
@@ -38,7 +29,7 @@ var ErrNoManifest = errors.New("Collection has no manifest")
 // CollectionFileReader returns a Reader that reads content from a single file
 // in the collection. The filename must be relative to the root of the
 // collection.  A leading prefix of "/" or "./" in the filename is ignored.
-func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (Reader, error) {
+func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, filename string) (arvados.File, error) {
        mText, ok := collection["manifest_text"].(string)
        if !ok {
                return nil, ErrNoManifest
@@ -47,7 +38,7 @@ func (kc *KeepClient) CollectionFileReader(collection map[string]interface{}, fi
        return kc.ManifestFileReader(m, filename)
 }
 
-func (kc *KeepClient) ManifestFileReader(m manifest.Manifest, filename string) (Reader, error) {
+func (kc *KeepClient) ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error) {
        f := &file{
                kc: kc,
        }
@@ -164,9 +155,9 @@ func (f *file) Seek(offset int64, whence int) (int64, error) {
        return f.offset, nil
 }
 
-// Len returns the file size in bytes.
-func (f *file) Len() uint64 {
-       return uint64(f.size)
+// Size returns the file size in bytes.
+func (f *file) Size() int64 {
+       return f.size
 }
 
 func (f *file) load(m manifest.Manifest, path string) error {
index aea93df1dc69970ce00d388aecfafc43e842a634..4a91401573f444598dc80582dabea0b3c9ba7231 100644 (file)
@@ -49,7 +49,7 @@ var ErrCancelled = errors.New("Cancelled")
 // IKeepClient is the minimal Keep API methods used by crunch-run.
 type IKeepClient interface {
        PutHB(hash string, buf []byte) (string, int, error)
-       ManifestFileReader(m manifest.Manifest, filename string) (keepclient.Reader, error)
+       ManifestFileReader(m manifest.Manifest, filename string) (arvados.File, error)
 }
 
 // NewLogWriter is a factory function to create a new log writer.
@@ -676,7 +676,7 @@ func (runner *ContainerRunner) AttachStreams() (err error) {
        runner.CrunchLog.Print("Attaching container streams")
 
        // If stdin mount is provided, attach it to the docker container
-       var stdinRdr keepclient.Reader
+       var stdinRdr arvados.File
        var stdinJson []byte
        if stdinMnt, ok := runner.Container.Mounts["stdin"]; ok {
                if stdinMnt.Kind == "collection" {
index 42c37b8eebf947bea060ef2ace9a68b1fcca67ad..a37cfb70b366ef0fa700caeb375c22e461cce76f 100644 (file)
@@ -4,16 +4,17 @@ import (
        "encoding/json"
        "fmt"
        "html"
+       "html/template"
        "io"
        "net/http"
        "net/url"
        "os"
-       "path"
+       "sort"
        "strconv"
        "strings"
        "sync"
-       "time"
 
+       "git.curoverse.com/arvados.git/sdk/go/arvados"
        "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
        "git.curoverse.com/arvados.git/sdk/go/auth"
        "git.curoverse.com/arvados.git/sdk/go/httpserver"
@@ -142,8 +143,8 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
 
        pathParts := strings.Split(r.URL.Path[1:], "/")
 
+       var stripParts int
        var targetID string
-       var targetPath []string
        var tokens []string
        var reqTokens []string
        var pathToken bool
@@ -160,26 +161,25 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
        if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
                // http://ID.collections.example/PATH...
                credentialsOK = true
-               targetPath = pathParts
        } else if r.URL.Path == "/status.json" {
                h.serveStatus(w, r)
                return
-       } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
+       } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
                // /c=ID/PATH...
                targetID = parseCollectionIDFromURL(pathParts[0][2:])
-               targetPath = pathParts[1:]
-       } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
-               if len(pathParts) >= 5 && pathParts[1] == "download" {
+               stripParts = 1
+       } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
+               if len(pathParts) >= 4 && pathParts[1] == "download" {
                        // /collections/download/ID/TOKEN/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[2])
                        tokens = []string{pathParts[3]}
-                       targetPath = pathParts[4:]
+                       stripParts = 4
                        pathToken = true
                } else {
                        // /collections/ID/PATH...
                        targetID = parseCollectionIDFromURL(pathParts[1])
                        tokens = h.Config.AnonymousTokens
-                       targetPath = pathParts[2:]
+                       stripParts = 2
                }
        }
 
@@ -210,56 +210,12 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // token in an HttpOnly cookie, and redirect to the
                // same URL with the query param redacted and method =
                // GET.
-
-               if !credentialsOK {
-                       // It is not safe to copy the provided token
-                       // into a cookie unless the current vhost
-                       // (origin) serves only a single collection or
-                       // we are in TrustAllContent mode.
-                       statusCode = http.StatusBadRequest
-                       return
-               }
-
-               // The HttpOnly flag is necessary to prevent
-               // JavaScript code (included in, or loaded by, a page
-               // in the collection being served) from employing the
-               // user's token beyond reading other files in the same
-               // domain, i.e., same collection.
-               //
-               // The 303 redirect is necessary in the case of a GET
-               // request to avoid exposing the token in the Location
-               // bar, and in the case of a POST request to avoid
-               // raising warnings when the user refreshes the
-               // resulting page.
-
-               http.SetCookie(w, &http.Cookie{
-                       Name:     "arvados_api_token",
-                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
-                       Path:     "/",
-                       HttpOnly: true,
-               })
-
-               // Propagate query parameters (except api_token) from
-               // the original request.
-               redirQuery := r.URL.Query()
-               redirQuery.Del("api_token")
-
-               redir := (&url.URL{
-                       Host:     r.Host,
-                       Path:     r.URL.Path,
-                       RawQuery: redirQuery.Encode(),
-               }).String()
-
-               w.Header().Add("Location", redir)
-               statusCode, statusText = http.StatusSeeOther, redir
-               w.WriteHeader(statusCode)
-               io.WriteString(w, `<A href="`)
-               io.WriteString(w, html.EscapeString(redir))
-               io.WriteString(w, `">Continue</A>`)
+               h.seeOtherWithCookie(w, r, "", credentialsOK)
                return
        }
 
-       if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+       targetPath := pathParts[stripParts:]
+       if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
                // http://ID.example/t=TOKEN/PATH...
                // /c=ID/t=TOKEN/PATH...
                //
@@ -269,6 +225,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                tokens = []string{targetPath[0][2:]}
                pathToken = true
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        if tokens == nil {
@@ -286,6 +243,7 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                // //collections.example/t=foo/ won't work because
                // t=foo will be interpreted as a token "foo".
                targetPath = targetPath[1:]
+               stripParts++
        }
 
        forceReload := false
@@ -349,31 +307,126 @@ func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
                return
        }
 
-       filename := strings.Join(targetPath, "/")
        kc, err := keepclient.MakeKeepClient(arv)
        if err != nil {
                statusCode, statusText = http.StatusInternalServerError, err.Error()
                return
        }
-       rdr, err := kc.CollectionFileReader(collection, filename)
-       if os.IsNotExist(err) {
+
+       basename := targetPath[len(targetPath)-1]
+       applyContentDispositionHdr(w, r, basename, attachment)
+
+       j, err := json.Marshal(collection)
+       if err != nil {
+               panic(err)
+       }
+       var coll arvados.Collection
+       err = json.Unmarshal(j, &coll)
+       if err != nil {
+               panic(err)
+       }
+       fs := coll.FileSystem(&arvados.Client{
+               APIHost:   arv.ApiServer,
+               AuthToken: arv.ApiToken,
+               Insecure:  arv.ApiInsecure,
+       }, kc)
+       openPath := "/" + strings.Join(targetPath, "/")
+       if f, err := fs.Open(openPath); os.IsNotExist(err) {
                statusCode = http.StatusNotFound
-               return
        } else if err != nil {
-               statusCode, statusText = http.StatusBadGateway, err.Error()
-               return
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat, err := f.Stat(); err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+       } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
+               h.seeOtherWithCookie(w, r, basename+"/", credentialsOK)
+       } else if stat.IsDir() {
+               h.serveDirectory(w, r, &coll, fs, openPath)
+       } else {
+               http.ServeContent(w, r, basename, stat.ModTime(), f)
+               if int64(w.WroteBodyBytes()) != stat.Size() {
+                       n, err := f.Read(make([]byte, 1024))
+                       statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
+
+               }
        }
-       defer rdr.Close()
+}
 
-       basename := path.Base(filename)
-       applyContentDispositionHdr(w, r, basename, attachment)
+var dirListingTemplate = `<!DOCTYPE HTML>
+<HTML><HEAD><TITLE>{{ .Collection.Name }}</TITLE></HEAD>
+<BODY>
+<H1>{{ .Collection.Name }}</H1>
+
+<P>This collection of data files is being shared with you through
+Arvados.  You can download individual files listed below.  To download
+the entire collection with wget, try:</P>
+
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs=3 {{ .Request.URL }}</PRE>
+
+<H2>File Listing</H2>
+
+<UL>
+{{range .Files}}  <LI><A href="{{.}}">{{.}}</A></LI>{{end}}
+</UL>
 
-       modstr, _ := collection["modified_at"].(string)
-       modtime, err := time.Parse(time.RFC3339Nano, modstr)
+<DIV class="footer">
+  <H2>About Arvados</H2>
+  <P>
+    Arvados is a free and open source software bioinformatics platform.
+    To learn more, visit arvados.org.
+    Arvados is not responsible for the files listed on this page.
+  </P>
+</DIV>
+
+</BODY>
+`
+
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collection *arvados.Collection, fs http.FileSystem, base string) {
+       var files []string
+       var walk func(string) error
+       if !strings.HasSuffix(base, "/") {
+               base = base + "/"
+       }
+       walk = func(path string) error {
+               dirname := base + path
+               if dirname != "/" {
+                       dirname = strings.TrimSuffix(dirname, "/")
+               }
+               d, err := fs.Open(dirname)
+               if err != nil {
+                       return err
+               }
+               ents, err := d.Readdir(-1)
+               if err != nil {
+                       return err
+               }
+               for _, ent := range ents {
+                       if ent.IsDir() {
+                               err = walk(path + ent.Name() + "/")
+                               if err != nil {
+                                       return err
+                               }
+                       } else {
+                               files = append(files, path+ent.Name())
+                       }
+               }
+               return nil
+       }
+       if err := walk(""); err != nil {
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
+       }
+       tmpl, err := template.New("dir").Parse(dirListingTemplate)
        if err != nil {
-               modtime = time.Now()
+               http.Error(w, err.Error(), http.StatusInternalServerError)
+               return
        }
-       http.ServeContent(w, r, basename, modtime, rdr)
+       sort.Strings(files)
+       w.WriteHeader(http.StatusOK)
+       tmpl.Execute(w, map[string]interface{}{
+               "Collection": collection,
+               "Files":      files,
+               "Request":    r,
+       })
 }
 
 func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
@@ -393,3 +446,61 @@ func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename
                w.Header().Set("Content-Disposition", disposition)
        }
 }
+
+func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
+       if !credentialsOK {
+               // It is not safe to copy the provided token
+               // into a cookie unless the current vhost
+               // (origin) serves only a single collection or
+               // we are in TrustAllContent mode.
+               w.WriteHeader(http.StatusBadRequest)
+               return
+       }
+
+       if formToken := r.FormValue("api_token"); formToken != "" {
+               // The HttpOnly flag is necessary to prevent
+               // JavaScript code (included in, or loaded by, a page
+               // in the collection being served) from employing the
+               // user's token beyond reading other files in the same
+               // domain, i.e., same collection.
+               //
+               // The 303 redirect is necessary in the case of a GET
+               // request to avoid exposing the token in the Location
+               // bar, and in the case of a POST request to avoid
+               // raising warnings when the user refreshes the
+               // resulting page.
+
+               http.SetCookie(w, &http.Cookie{
+                       Name:     "arvados_api_token",
+                       Value:    auth.EncodeTokenCookie([]byte(formToken)),
+                       Path:     "/",
+                       HttpOnly: true,
+               })
+       }
+
+       // Propagate query parameters (except api_token) from
+       // the original request.
+       redirQuery := r.URL.Query()
+       redirQuery.Del("api_token")
+
+       u := r.URL
+       if location != "" {
+               newu, err := u.Parse(location)
+               if err != nil {
+                       w.WriteHeader(http.StatusInternalServerError)
+                       return
+               }
+               u = newu
+       }
+       redir := (&url.URL{
+               Host:     r.Host,
+               Path:     u.Path,
+               RawQuery: redirQuery.Encode(),
+       }).String()
+
+       w.Header().Add("Location", redir)
+       w.WriteHeader(http.StatusSeeOther)
+       io.WriteString(w, `<A href="`)
+       io.WriteString(w, html.EscapeString(redir))
+       io.WriteString(w, `">Continue</A>`)
+}
index df0346ba315f420ce5aae5dd2d3a59c06e4e87fb..508c9cb21e382f115ec0b5422f342a42614b7bbd 100644 (file)
@@ -479,3 +479,98 @@ func (s *IntegrationSuite) testVhostRedirectTokenToCookie(c *check.C, method, ho
        c.Check(resp.Header().Get("Location"), check.Equals, "")
        return resp
 }
+
+func (s *IntegrationSuite) TestDirectoryListing(c *check.C) {
+       s.testServer.Config.AttachmentOnlyHost = "download.example.com"
+       authHeader := http.Header{
+               "Authorization": {"OAuth2 " + arvadostest.ActiveToken},
+       }
+       for _, trial := range []struct {
+               uri    string
+               header http.Header
+               expect []string
+       }{
+               {
+                       uri:    strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/",
+                       header: authHeader,
+                       expect: []string{"dir1/foo", "dir1/bar"},
+               },
+               {
+                       uri:    strings.Replace(arvadostest.FooAndBarFilesInDirPDH, "+", "-", -1) + ".example.com/dir1/",
+                       header: authHeader,
+                       expect: []string{"foo", "bar"},
+               },
+               {
+                       uri:    "download.example.com/collections/" + arvadostest.FooAndBarFilesInDirUUID + "/",
+                       header: authHeader,
+                       expect: []string{"dir1/foo", "dir1/bar"},
+               },
+               {
+                       uri:    "collections.example.com/collections/download/" + arvadostest.FooAndBarFilesInDirUUID + "/" + arvadostest.ActiveToken + "/",
+                       header: nil,
+                       expect: []string{"dir1/foo", "dir1/bar"},
+               },
+               {
+                       uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/t=" + arvadostest.ActiveToken + "/",
+                       header: nil,
+                       expect: []string{"dir1/foo", "dir1/bar"},
+               },
+               {
+                       uri:    "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/dir1/",
+                       header: authHeader,
+                       expect: []string{"foo", "bar"},
+               },
+               {
+                       uri:    "download.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/_/dir1/",
+                       header: authHeader,
+                       expect: []string{"foo", "bar"},
+               },
+               {
+                       uri:    arvadostest.FooAndBarFilesInDirUUID + ".example.com/dir1?api_token=" + arvadostest.ActiveToken,
+                       header: authHeader,
+                       expect: []string{"foo", "bar"},
+               },
+               {
+                       uri:    "collections.example.com/c=" + arvadostest.FooAndBarFilesInDirUUID + "/theperthcountyconspiracydoesnotexist/",
+                       header: authHeader,
+                       expect: nil,
+               },
+       } {
+               c.Logf("%q => %q", trial.uri, trial.expect)
+               resp := httptest.NewRecorder()
+               u := mustParseURL("//" + trial.uri)
+               req := &http.Request{
+                       Method:     "GET",
+                       Host:       u.Host,
+                       URL:        u,
+                       RequestURI: u.RequestURI(),
+                       Header:     trial.header,
+               }
+               s.testServer.Handler.ServeHTTP(resp, req)
+               var cookies []*http.Cookie
+               for resp.Code == http.StatusSeeOther {
+                       u, _ := req.URL.Parse(resp.Header().Get("Location"))
+                       req = &http.Request{
+                               Method:     "GET",
+                               Host:       u.Host,
+                               URL:        u,
+                               RequestURI: u.RequestURI(),
+                               Header:     http.Header{},
+                       }
+                       cookies = append(cookies, (&http.Response{Header: resp.Header()}).Cookies()...)
+                       for _, c := range cookies {
+                               req.AddCookie(c)
+                       }
+                       resp = httptest.NewRecorder()
+                       s.testServer.Handler.ServeHTTP(resp, req)
+               }
+               if trial.expect == nil {
+                       c.Check(resp.Code, check.Equals, http.StatusNotFound)
+               } else {
+                       c.Check(resp.Code, check.Equals, http.StatusOK)
+                       for _, e := range trial.expect {
+                               c.Check(resp.Body.String(), check.Matches, `(?ms).*href="`+e+`".*`)
+                       }
+               }
+       }
+}
index 52fe459ec43ff13c422c1679017de68246a36d1d..500561d69c74a41d18f923c982a7b8397030bf67 100644 (file)
@@ -77,7 +77,9 @@ func (s *IntegrationSuite) Test404(c *check.C) {
        } {
                hdr, body, _ := s.runCurl(c, arvadostest.ActiveToken, "collections.example.com", uri)
                c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
-               c.Check(body, check.Equals, "")
+               if len(body) > 0 {
+                       c.Check(body, check.Equals, "404 page not found\n")
+               }
        }
 }