+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
package main
import (
+ "encoding/json"
"fmt"
"html"
+ "html/template"
"io"
"net/http"
"net/url"
"os"
- "path"
+ "sort"
"strconv"
"strings"
"sync"
- "time"
+ "git.curoverse.com/arvados.git/sdk/go/arvados"
"git.curoverse.com/arvados.git/sdk/go/arvadosclient"
"git.curoverse.com/arvados.git/sdk/go/auth"
+ "git.curoverse.com/arvados.git/sdk/go/health"
"git.curoverse.com/arvados.git/sdk/go/httpserver"
"git.curoverse.com/arvados.git/sdk/go/keepclient"
+ "golang.org/x/net/webdav"
)
type handler struct {
- Config *Config
- clientPool *arvadosclient.ClientPool
- setupOnce sync.Once
+ Config *Config
+ clientPool *arvadosclient.ClientPool
+ setupOnce sync.Once
+ healthHandler http.Handler
+ webdavLS webdav.LockSystem
}
// parseCollectionIDFromDNSName returns a UUID or PDH if s begins with
func (h *handler) setup() {
h.clientPool = arvadosclient.MakeClientPool()
+
keepclient.RefreshServiceDiscoveryOnSIGHUP()
+
+ h.healthHandler = &health.Handler{
+ Token: h.Config.ManagementToken,
+ Prefix: "/_health/",
+ }
+
+ // Even though we don't accept LOCK requests, every webdav
+ // handler must have a non-nil LockSystem.
+ h.webdavLS = &noLockSystem{}
+}
+
+func (h *handler) serveStatus(w http.ResponseWriter, r *http.Request) {
+ status := struct {
+ cacheStats
+ }{
+ cacheStats: h.Config.Cache.Stats(),
+ }
+ json.NewEncoder(w).Encode(status)
}
+var (
+ webdavMethod = map[string]bool{
+ "OPTIONS": true,
+ "PROPFIND": true,
+ }
+ browserMethod = map[string]bool{
+ "GET": true,
+ "HEAD": true,
+ "POST": true,
+ }
+)
+
// ServeHTTP implements http.Handler.
func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
h.setupOnce.Do(h.setup)
httpserver.Log(remoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.Host, r.URL.Path, r.URL.RawQuery)
}()
- if r.Method == "OPTIONS" {
- method := r.Header.Get("Access-Control-Request-Method")
- if method != "GET" && method != "POST" {
+ if strings.HasPrefix(r.URL.Path, "/_health/") && r.Method == "GET" {
+ h.healthHandler.ServeHTTP(w, r)
+ return
+ }
+
+ if method := r.Header.Get("Access-Control-Request-Method"); method != "" && r.Method == "OPTIONS" {
+ if !browserMethod[method] && !webdavMethod[method] {
statusCode = http.StatusMethodNotAllowed
return
}
- w.Header().Set("Access-Control-Allow-Headers", "Range")
- w.Header().Set("Access-Control-Allow-Methods", "GET, POST")
+ w.Header().Set("Access-Control-Allow-Headers", "Authorization, Content-Type, Range")
+ w.Header().Set("Access-Control-Allow-Methods", "GET, POST, OPTIONS, PROPFIND")
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Access-Control-Max-Age", "86400")
statusCode = http.StatusOK
return
}
- if r.Method != "GET" && r.Method != "POST" {
+ if !browserMethod[r.Method] && !webdavMethod[r.Method] {
statusCode, statusText = http.StatusMethodNotAllowed, r.Method
return
}
// SSL certificates. See
// http://www.w3.org/TR/cors/#user-credentials).
w.Header().Set("Access-Control-Allow-Origin", "*")
+ w.Header().Set("Access-Control-Expose-Headers", "Content-Range")
}
arv := h.clientPool.Get()
pathParts := strings.Split(r.URL.Path[1:], "/")
+ var stripParts int
var targetID string
- var targetPath []string
var tokens []string
var reqTokens []string
var pathToken bool
if targetID = parseCollectionIDFromDNSName(r.Host); targetID != "" {
// http://ID.collections.example/PATH...
credentialsOK = true
- targetPath = pathParts
- } else if len(pathParts) >= 2 && strings.HasPrefix(pathParts[0], "c=") {
- // /c=ID/PATH...
+ } else if r.URL.Path == "/status.json" {
+ h.serveStatus(w, r)
+ return
+ } else if len(pathParts) >= 1 && strings.HasPrefix(pathParts[0], "c=") {
+ // /c=ID[/PATH...]
targetID = parseCollectionIDFromURL(pathParts[0][2:])
- targetPath = pathParts[1:]
- } else if len(pathParts) >= 3 && pathParts[0] == "collections" {
- if len(pathParts) >= 5 && pathParts[1] == "download" {
+ stripParts = 1
+ } else if len(pathParts) >= 2 && pathParts[0] == "collections" {
+ if len(pathParts) >= 4 && pathParts[1] == "download" {
// /collections/download/ID/TOKEN/PATH...
targetID = parseCollectionIDFromURL(pathParts[2])
tokens = []string{pathParts[3]}
- targetPath = pathParts[4:]
+ stripParts = 4
pathToken = true
} else {
// /collections/ID/PATH...
targetID = parseCollectionIDFromURL(pathParts[1])
tokens = h.Config.AnonymousTokens
- targetPath = pathParts[2:]
+ stripParts = 2
}
}
// * The token isn't embedded in the URL, so we don't
// need to worry about bookmarks and copy/paste.
tokens = append(tokens, formToken)
- } else if formToken != "" {
+ } else if formToken != "" && browserMethod[r.Method] {
// The client provided an explicit token in the query
// string, or a form in POST body. We must put the
// token in an HttpOnly cookie, and redirect to the
// same URL with the query param redacted and method =
// GET.
-
- if !credentialsOK {
- // It is not safe to copy the provided token
- // into a cookie unless the current vhost
- // (origin) serves only a single collection or
- // we are in TrustAllContent mode.
- statusCode = http.StatusBadRequest
- return
- }
-
- // The HttpOnly flag is necessary to prevent
- // JavaScript code (included in, or loaded by, a page
- // in the collection being served) from employing the
- // user's token beyond reading other files in the same
- // domain, i.e., same collection.
- //
- // The 303 redirect is necessary in the case of a GET
- // request to avoid exposing the token in the Location
- // bar, and in the case of a POST request to avoid
- // raising warnings when the user refreshes the
- // resulting page.
-
- http.SetCookie(w, &http.Cookie{
- Name: "arvados_api_token",
- Value: auth.EncodeTokenCookie([]byte(formToken)),
- Path: "/",
- HttpOnly: true,
- })
-
- // Propagate query parameters (except api_token) from
- // the original request.
- redirQuery := r.URL.Query()
- redirQuery.Del("api_token")
-
- redir := (&url.URL{
- Host: r.Host,
- Path: r.URL.Path,
- RawQuery: redirQuery.Encode(),
- }).String()
-
- w.Header().Add("Location", redir)
- statusCode, statusText = http.StatusSeeOther, redir
- w.WriteHeader(statusCode)
- io.WriteString(w, `<A href="`)
- io.WriteString(w, html.EscapeString(redir))
- io.WriteString(w, `">Continue</A>`)
+ h.seeOtherWithCookie(w, r, "", credentialsOK)
return
}
- if tokens == nil && strings.HasPrefix(targetPath[0], "t=") {
+ targetPath := pathParts[stripParts:]
+ if tokens == nil && len(targetPath) > 0 && strings.HasPrefix(targetPath[0], "t=") {
// http://ID.example/t=TOKEN/PATH...
// /c=ID/t=TOKEN/PATH...
//
tokens = []string{targetPath[0][2:]}
pathToken = true
targetPath = targetPath[1:]
+ stripParts++
}
if tokens == nil {
// //collections.example/t=foo/ won't work because
// t=foo will be interpreted as a token "foo".
targetPath = targetPath[1:]
+ stripParts++
}
+ forceReload := false
+ if cc := r.Header.Get("Cache-Control"); strings.Contains(cc, "no-cache") || strings.Contains(cc, "must-revalidate") {
+ forceReload = true
+ }
+
+ var collection *arvados.Collection
tokenResult := make(map[string]int)
- collection := make(map[string]interface{})
- found := false
for _, arv.ApiToken = range tokens {
- err := arv.Get("collections", targetID, nil, &collection)
+ var err error
+ collection, err = h.Config.Cache.Get(arv, targetID, forceReload)
if err == nil {
// Success
- found = true
break
}
if srvErr, ok := err.(arvadosclient.APIServerError); ok {
statusCode, statusText = http.StatusInternalServerError, err.Error()
return
}
- if !found {
+ if collection == nil {
if pathToken || !credentialsOK {
// Either the URL is a "secret sharing link"
// that didn't work out (and asking the client
return
}
- filename := strings.Join(targetPath, "/")
kc, err := keepclient.MakeKeepClient(arv)
if err != nil {
statusCode, statusText = http.StatusInternalServerError, err.Error()
return
}
- rdr, err := kc.CollectionFileReader(collection, filename)
- if os.IsNotExist(err) {
- statusCode = http.StatusNotFound
+
+ var basename string
+ if len(targetPath) > 0 {
+ basename = targetPath[len(targetPath)-1]
+ }
+ applyContentDispositionHdr(w, r, basename, attachment)
+
+ fs := collection.FileSystem(&arvados.Client{
+ APIHost: arv.ApiServer,
+ AuthToken: arv.ApiToken,
+ Insecure: arv.ApiInsecure,
+ }, kc)
+ if webdavMethod[r.Method] {
+ h := webdav.Handler{
+ Prefix: "/" + strings.Join(pathParts[:stripParts], "/"),
+ FileSystem: &webdavFS{collfs: fs},
+ LockSystem: h.webdavLS,
+ Logger: func(_ *http.Request, err error) {
+ if os.IsNotExist(err) {
+ statusCode, statusText = http.StatusNotFound, err.Error()
+ } else if err != nil {
+ statusCode, statusText = http.StatusInternalServerError, err.Error()
+ }
+ },
+ }
+ h.ServeHTTP(w, r)
return
+ }
+
+ openPath := "/" + strings.Join(targetPath, "/")
+ if f, err := fs.Open(openPath); os.IsNotExist(err) {
+ // Requested non-existent path
+ statusCode = http.StatusNotFound
} else if err != nil {
- statusCode, statusText = http.StatusBadGateway, err.Error()
- return
+ // Some other (unexpected) error
+ statusCode, statusText = http.StatusInternalServerError, err.Error()
+ } else if stat, err := f.Stat(); err != nil {
+ // Can't get Size/IsDir (shouldn't happen with a collectionFS!)
+ statusCode, statusText = http.StatusInternalServerError, err.Error()
+ } else if stat.IsDir() && !strings.HasSuffix(r.URL.Path, "/") {
+ // If client requests ".../dirname", redirect to
+ // ".../dirname/". This way, relative links in the
+ // listing for "dirname" can always be "fnm", never
+ // "dirname/fnm".
+ h.seeOtherWithCookie(w, r, r.URL.Path+"/", credentialsOK)
+ } else if stat.IsDir() {
+ h.serveDirectory(w, r, collection.Name, fs, openPath, stripParts)
+ } else {
+ http.ServeContent(w, r, basename, stat.ModTime(), f)
+ if r.Header.Get("Range") == "" && int64(w.WroteBodyBytes()) != stat.Size() {
+ // If we wrote fewer bytes than expected, it's
+ // too late to change the real response code
+ // or send an error message to the client, but
+ // at least we can try to put some useful
+ // debugging info in the logs.
+ n, err := f.Read(make([]byte, 1024))
+ statusCode, statusText = http.StatusInternalServerError, fmt.Sprintf("f.Size()==%d but only wrote %d bytes; read(1024) returns %d, %s", stat.Size(), w.WroteBodyBytes(), n, err)
+
+ }
}
- defer rdr.Close()
+}
- basename := path.Base(filename)
- applyContentDispositionHdr(w, r, basename, attachment)
+var dirListingTemplate = `<!DOCTYPE HTML>
+<HTML><HEAD>
+ <META name="robots" content="NOINDEX">
+ <TITLE>{{ .Collection.Name }}</TITLE>
+ <STYLE type="text/css">
+ body {
+ margin: 1.5em;
+ }
+ pre {
+ background-color: #D9EDF7;
+ border-radius: .25em;
+ padding: .75em;
+ overflow: auto;
+ }
+ .footer p {
+ font-size: 82%;
+ }
+ ul {
+ padding: 0;
+ }
+ ul li {
+ font-family: monospace;
+ list-style: none;
+ }
+ </STYLE>
+</HEAD>
+<BODY>
+<H1>{{ .CollectionName }}</H1>
+
+<P>This collection of data files is being shared with you through
+Arvados. You can download individual files listed below. To download
+the entire collection with wget, try:</P>
+
+<PRE>$ wget --mirror --no-parent --no-host --cut-dirs={{ .StripParts }} https://{{ .Request.Host }}{{ .Request.URL }}</PRE>
+
+<H2>File Listing</H2>
+
+{{if .Files}}
+<UL>
+{{range .Files}} <LI>{{.Size | printf "%15d " | nbsp}}<A href="{{.Name}}">{{.Name}}</A></LI>{{end}}
+</UL>
+{{else}}
+<P>(No files; this collection is empty.)</P>
+{{end}}
+
+<HR noshade>
+<DIV class="footer">
+ <P>
+ About Arvados:
+ Arvados is a free and open source software bioinformatics platform.
+ To learn more, visit arvados.org.
+ Arvados is not responsible for the files listed on this page.
+ </P>
+</DIV>
+
+</BODY>
+`
+
+type fileListEnt struct {
+ Name string
+ Size int64
+}
- modstr, _ := collection["modified_at"].(string)
- modtime, err := time.Parse(time.RFC3339Nano, modstr)
+func (h *handler) serveDirectory(w http.ResponseWriter, r *http.Request, collectionName string, fs http.FileSystem, base string, stripParts int) {
+ var files []fileListEnt
+ var walk func(string) error
+ if !strings.HasSuffix(base, "/") {
+ base = base + "/"
+ }
+ walk = func(path string) error {
+ dirname := base + path
+ if dirname != "/" {
+ dirname = strings.TrimSuffix(dirname, "/")
+ }
+ d, err := fs.Open(dirname)
+ if err != nil {
+ return err
+ }
+ ents, err := d.Readdir(-1)
+ if err != nil {
+ return err
+ }
+ for _, ent := range ents {
+ if ent.IsDir() {
+ err = walk(path + ent.Name() + "/")
+ if err != nil {
+ return err
+ }
+ } else {
+ files = append(files, fileListEnt{
+ Name: path + ent.Name(),
+ Size: ent.Size(),
+ })
+ }
+ }
+ return nil
+ }
+ if err := walk(""); err != nil {
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
+ }
+
+ funcs := template.FuncMap{
+ "nbsp": func(s string) template.HTML {
+ return template.HTML(strings.Replace(s, " ", " ", -1))
+ },
+ }
+ tmpl, err := template.New("dir").Funcs(funcs).Parse(dirListingTemplate)
if err != nil {
- modtime = time.Now()
+ http.Error(w, err.Error(), http.StatusInternalServerError)
+ return
}
- http.ServeContent(w, r, basename, modtime, rdr)
+ sort.Slice(files, func(i, j int) bool {
+ return files[i].Name < files[j].Name
+ })
+ w.WriteHeader(http.StatusOK)
+ tmpl.Execute(w, map[string]interface{}{
+ "CollectionName": collectionName,
+ "Files": files,
+ "Request": r,
+ "StripParts": stripParts,
+ })
}
func applyContentDispositionHdr(w http.ResponseWriter, r *http.Request, filename string, isAttachment bool) {
w.Header().Set("Content-Disposition", disposition)
}
}
+
+func (h *handler) seeOtherWithCookie(w http.ResponseWriter, r *http.Request, location string, credentialsOK bool) {
+ if formToken := r.FormValue("api_token"); formToken != "" {
+ if !credentialsOK {
+ // It is not safe to copy the provided token
+ // into a cookie unless the current vhost
+ // (origin) serves only a single collection or
+ // we are in TrustAllContent mode.
+ w.WriteHeader(http.StatusBadRequest)
+ return
+ }
+
+ // The HttpOnly flag is necessary to prevent
+ // JavaScript code (included in, or loaded by, a page
+ // in the collection being served) from employing the
+ // user's token beyond reading other files in the same
+ // domain, i.e., same collection.
+ //
+ // The 303 redirect is necessary in the case of a GET
+ // request to avoid exposing the token in the Location
+ // bar, and in the case of a POST request to avoid
+ // raising warnings when the user refreshes the
+ // resulting page.
+ http.SetCookie(w, &http.Cookie{
+ Name: "arvados_api_token",
+ Value: auth.EncodeTokenCookie([]byte(formToken)),
+ Path: "/",
+ HttpOnly: true,
+ })
+ }
+
+ // Propagate query parameters (except api_token) from
+ // the original request.
+ redirQuery := r.URL.Query()
+ redirQuery.Del("api_token")
+
+ u := r.URL
+ if location != "" {
+ newu, err := u.Parse(location)
+ if err != nil {
+ w.WriteHeader(http.StatusInternalServerError)
+ return
+ }
+ u = newu
+ }
+ redir := (&url.URL{
+ Host: r.Host,
+ Path: u.Path,
+ RawQuery: redirQuery.Encode(),
+ }).String()
+
+ w.Header().Add("Location", redir)
+ w.WriteHeader(http.StatusSeeOther)
+ io.WriteString(w, `<A href="`)
+ io.WriteString(w, html.EscapeString(redir))
+ io.WriteString(w, `">Continue</A>`)
+}