5824: Add keepdl.
authorTom Clegg <tom@curoverse.com>
Thu, 23 Jul 2015 04:02:11 +0000 (00:02 -0400)
committerTom Clegg <tom@curoverse.com>
Thu, 15 Oct 2015 21:06:10 +0000 (17:06 -0400)
services/keep-web/.gitignore [new file with mode: 0644]
services/keep-web/handler.go [new file with mode: 0644]
services/keep-web/main.go [new file with mode: 0644]
services/keep-web/server.go [new file with mode: 0644]
services/keep-web/server_test.go [new file with mode: 0644]

diff --git a/services/keep-web/.gitignore b/services/keep-web/.gitignore
new file mode 100644 (file)
index 0000000..173e306
--- /dev/null
@@ -0,0 +1 @@
+keepdl
diff --git a/services/keep-web/handler.go b/services/keep-web/handler.go
new file mode 100644 (file)
index 0000000..bbcd53c
--- /dev/null
@@ -0,0 +1,153 @@
+package main
+
+import (
+       "fmt"
+       "io"
+       "net/http"
+       "os"
+       "strings"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/auth"
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var clientPool = arvadosclient.MakeClientPool()
+
+var anonymousTokens []string
+
+type handler struct{}
+
+func init() {
+       // TODO(TC): Get anonymousTokens from flags
+       anonymousTokens = []string{}
+}
+
+func (h *handler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       var statusCode int
+       var statusText string
+
+       w := httpserver.WrapResponseWriter(wOrig)
+       defer func() {
+               if statusCode > 0 {
+                       if w.WroteStatus() == 0 {
+                               w.WriteHeader(statusCode)
+                       } else {
+                               httpserver.Log(r.RemoteAddr, "WARNING",
+                                       fmt.Sprintf("Our status changed from %d to %d after we sent headers", w.WroteStatus(), statusCode))
+                       }
+               }
+               if statusText == "" {
+                       statusText = http.StatusText(statusCode)
+               }
+               httpserver.Log(r.RemoteAddr, statusCode, statusText, w.WroteBodyBytes(), r.Method, r.URL.Path)
+       }()
+
+       arv := clientPool.Get()
+       if arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "Pool failed: "+clientPool.Err().Error()
+               return
+       }
+       defer clientPool.Put(arv)
+
+       pathParts := strings.Split(r.URL.Path[1:], "/")
+
+       if len(pathParts) < 3 || pathParts[0] != "collections" || pathParts[1] == "" || pathParts[2] == "" {
+               statusCode = http.StatusNotFound
+               return
+       }
+
+       var targetId string
+       var targetPath []string
+       var tokens []string
+       var reqTokens []string
+       var pathToken bool
+       if len(pathParts) >= 5 && pathParts[1] == "download" {
+               // "/collections/download/{id}/{token}/path..." form:
+               // Don't use our configured anonymous tokens,
+               // Authorization headers, etc.  Just use the token in
+               // the path.
+               targetId = pathParts[2]
+               tokens = []string{pathParts[3]}
+               targetPath = pathParts[4:]
+               pathToken = true
+       } else {
+               // "/collections/{id}/path..." form
+               targetId = pathParts[1]
+               reqTokens = auth.NewCredentialsFromHTTPRequest(r).Tokens
+               tokens = append(reqTokens, anonymousTokens...)
+               targetPath = pathParts[2:]
+       }
+
+       tokenResult := make(map[string]int)
+       collection := make(map[string]interface{})
+       found := false
+       for _, arv.ApiToken = range tokens {
+               err := arv.Get("collections", targetId, nil, &collection)
+               httpserver.Log(err)
+               if err == nil {
+                       // Success
+                       found = true
+                       break
+               }
+               if srvErr, ok := err.(arvadosclient.APIServerError); ok {
+                       switch srvErr.HttpStatusCode {
+                       case 404, 401:
+                               // Token broken or insufficient to
+                               // retrieve collection
+                               tokenResult[arv.ApiToken] = srvErr.HttpStatusCode
+                               continue
+                       }
+               }
+               // Something more serious is wrong
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       if !found {
+               if pathToken {
+                       // The URL is a "secret sharing link", but it
+                       // didn't work out. Asking the client for
+                       // additional credentials would just be
+                       // confusing.
+                       statusCode = http.StatusNotFound
+                       return
+               }
+               for _, t := range reqTokens {
+                       if tokenResult[t] == 404 {
+                               // The client provided valid token(s), but the
+                               // collection was not found.
+                               statusCode = http.StatusNotFound
+                               return
+                       }
+               }
+               // The client's token was invalid (e.g., expired), or
+               // the client didn't even provide one.  Propagate the
+               // 401 to encourage the client to use a [different]
+               // token.
+               //
+               // TODO(TC): This response would be confusing to
+               // someone trying (anonymously) to download public
+               // data that has been deleted.  Allow a referrer to
+               // provide this context somehow?
+               statusCode = http.StatusUnauthorized
+               w.Header().Add("WWW-Authenticate", "Basic realm=\"dl\"")
+               return
+       }
+
+       filename := strings.Join(targetPath, "/")
+       rdr, err := arvadosclient.CollectionFileReader(collection, filename)
+       if os.IsNotExist(err) {
+               statusCode = http.StatusNotFound
+               return
+       } else if err == arvadosclient.ErrNotImplemented {
+               statusCode = http.StatusNotImplemented
+               return
+       } else if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+               return
+       }
+       _, err = io.Copy(w, rdr)
+       if err != nil {
+               statusCode, statusText = http.StatusBadGateway, err.Error()
+       }
+}
diff --git a/services/keep-web/main.go b/services/keep-web/main.go
new file mode 100644 (file)
index 0000000..d780cc3
--- /dev/null
@@ -0,0 +1,28 @@
+package main
+
+import (
+       "flag"
+       "log"
+       "os"
+)
+
+func init() {
+       // MakeArvadosClient returns an error if this env var isn't
+       // available as a default token (even if we explicitly set a
+       // different token before doing anything with the client). We
+       // set this dummy value during init so it doesn't clobber the
+       // one used by "run test servers".
+       os.Setenv("ARVADOS_API_TOKEN", "xxx")
+}
+
+func main() {
+       flag.Parse()
+       srv := &server{}
+       if err := srv.Start(); err != nil {
+               log.Fatal(err)
+       }
+       log.Println("Listening at", srv.Addr)
+       if err := srv.Wait(); err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/services/keep-web/server.go b/services/keep-web/server.go
new file mode 100644 (file)
index 0000000..44da00f
--- /dev/null
@@ -0,0 +1,27 @@
+package main
+
+import (
+       "flag"
+       "net/http"
+
+       "git.curoverse.com/arvados.git/sdk/go/httpserver"
+)
+
+var address string
+
+func init() {
+       flag.StringVar(&address, "address", "0.0.0.0:80",
+               "Address to listen on, \"host:port\".")
+}
+
+type server struct {
+       httpserver.Server
+}
+
+func (srv *server) Start() error {
+       mux := http.NewServeMux()
+       mux.Handle("/", &handler{})
+       srv.Handler = mux
+       srv.Addr = address
+       return srv.Server.Start()
+}
diff --git a/services/keep-web/server_test.go b/services/keep-web/server_test.go
new file mode 100644 (file)
index 0000000..1c36f98
--- /dev/null
@@ -0,0 +1,170 @@
+package main
+
+import (
+       "crypto/md5"
+       "fmt"
+       "os/exec"
+       "strings"
+       "testing"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+       "git.curoverse.com/arvados.git/sdk/go/arvadostest"
+       "git.curoverse.com/arvados.git/sdk/go/keepclient"
+       check "gopkg.in/check.v1"
+)
+
+var _ = check.Suite(&IntegrationSuite{})
+
+const (
+       spectatorToken  = "zw2f4gwx8hw8cjre7yp6v1zylhrhn3m5gvjq73rtpwhmknrybu"
+       activeToken     = "3kg6k6lzmp9kj5cpkcoxie963cmvjahbt2fod9zru30k1jqdmi"
+       anonymousToken  = "4kg6k6lzmp9kj4cpkcoxie964cmvjahbt4fod9zru44k4jqdmi"
+       fooCollection   = "zzzzz-4zz18-fy296fx3hot09f7"
+       bogusCollection = "zzzzz-4zz18-totallynotexist"
+       hwCollection    = "zzzzz-4zz18-4en62shvi99lxd4"
+)
+
+// IntegrationSuite tests need an API server and an arv-git-httpd server
+type IntegrationSuite struct {
+       testServer *server
+}
+
+func (s *IntegrationSuite) TestNoToken(c *check.C) {
+       for _, token := range []string{
+               "",
+               "bogustoken",
+       } {
+               hdr, body := s.runCurl(c, token, "/collections/"+fooCollection+"/foo")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 401 Unauthorized\r\n.*`)
+               c.Check(body, check.Equals, "")
+
+               if token != "" {
+                       hdr, body = s.runCurl(c, token, "/collections/download/"+fooCollection+"/"+token+"/foo")
+                       c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+                       c.Check(body, check.Equals, "")
+               }
+
+               hdr, body = s.runCurl(c, token, "/bad-route")
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 404 Not Found\r\n.*`)
+               c.Check(body, check.Equals, "")
+       }
+}
+
+// TODO: Move most cases to functional tests -- at least use Go's own
+// http client instead of forking curl. Just leave enough of an
+// integration test to assure that the documented way of invoking curl
+// really works against the server.
+func (s *IntegrationSuite) Test404(c *check.C) {
+       for _, uri := range []string{
+               // Routing errors
+               "/",
+               "/foo",
+               "/download",
+               "/collections",
+               "/collections/",
+               "/collections/" + fooCollection,
+               "/collections/" + fooCollection + "/",
+               // Non-existent file in collection
+               "/collections/" + fooCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + fooCollection + "/" + activeToken + "/theperthcountyconspiracy",
+               // Non-existent collection
+               "/collections/" + bogusCollection,
+               "/collections/" + bogusCollection + "/",
+               "/collections/" + bogusCollection + "/theperthcountyconspiracy",
+               "/collections/download/" + bogusCollection + "/" + activeToken + "/theperthcountyconspiracy",
+       } {
+               hdr, body := s.runCurl(c, activeToken, uri)
+               c.Check(hdr, check.Matches, "(?s)HTTP/1.1 404 Not Found\r\n.*")
+               c.Check(body, check.Equals, "")
+       }
+}
+
+func (s *IntegrationSuite) Test200(c *check.C) {
+       anonymousTokens = []string{anonymousToken}
+       arv, err := arvadosclient.MakeArvadosClient()
+       c.Assert(err, check.Equals, nil)
+       arv.ApiToken = activeToken
+       kc, err := keepclient.MakeKeepClient(&arv)
+       c.Assert(err, check.Equals, nil)
+       kc.PutB([]byte("Hello world\n"))
+       kc.PutB([]byte("foo"))
+       for _, spec := range [][]string{
+               // My collection
+               {activeToken, "/collections/" + fooCollection + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+               {"", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+               {"tokensobogus", "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+               {activeToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+               {anonymousToken, "/collections/download/" + fooCollection + "/" + activeToken + "/foo", "acbd18db4cc2f85cedef654fccc4a4d8"},
+               // Anonymously accessible user agreement. These should
+               // start working when CollectionFileReader provides
+               // real data instead of fake/stub data.
+               {"", "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+               {activeToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+               {spectatorToken, "/collections/"+hwCollection+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+               {spectatorToken, "/collections/download/"+hwCollection+"/"+spectatorToken+"/Hello%20world.txt", "f0ef7081e1539ac00ef5b761b4fb01b3"},
+       } {
+               hdr, body := s.runCurl(c, spec[0], spec[1])
+               if strings.HasPrefix(hdr, "HTTP/1.1 501 Not Implemented\r\n") && body == "" {
+                       c.Log("Not implemented!")
+                       continue
+               }
+               c.Check(hdr, check.Matches, `(?s)HTTP/1.1 200 OK\r\n.*`)
+               c.Check(fmt.Sprintf("%x", md5.Sum([]byte(body))), check.Equals, spec[2])
+       }
+}
+
+// Return header block and body.
+func (s *IntegrationSuite) runCurl(c *check.C, token, uri string, args ...string) (hdr, body string) {
+       curlArgs := []string{"--silent", "--show-error", "--include"}
+       if token != "" {
+               curlArgs = append(curlArgs, "-H", "Authorization: OAuth2 "+token)
+       }
+       curlArgs = append(curlArgs, args...)
+       curlArgs = append(curlArgs, "http://"+s.testServer.Addr+uri)
+       c.Log(fmt.Sprintf("curlArgs == %#v", curlArgs))
+       output, err := exec.Command("curl", curlArgs...).CombinedOutput()
+       // Without "-f", curl exits 0 as long as it gets a valid HTTP
+       // response from the server, even if the response status
+       // indicates that the request failed. In our test suite, we
+       // always expect a valid HTTP response, and we parse the
+       // headers ourselves. If curl exits non-zero, our testing
+       // environment is broken.
+       c.Assert(err, check.Equals, nil)
+       hdrsAndBody := strings.SplitN(string(output), "\r\n\r\n", 2)
+       c.Assert(len(hdrsAndBody), check.Equals, 2)
+       hdr = hdrsAndBody[0]
+       body = hdrsAndBody[1]
+       return
+}
+
+func (s *IntegrationSuite) SetUpSuite(c *check.C) {
+       arvadostest.StartAPI()
+       arvadostest.StartKeep()
+}
+
+func (s *IntegrationSuite) TearDownSuite(c *check.C) {
+       arvadostest.StopKeep()
+       arvadostest.StopAPI()
+}
+
+func (s *IntegrationSuite) SetUpTest(c *check.C) {
+       arvadostest.ResetEnv()
+       s.testServer = &server{}
+       var err error
+       address = "127.0.0.1:0"
+       err = s.testServer.Start()
+       c.Assert(err, check.Equals, nil)
+}
+
+func (s *IntegrationSuite) TearDownTest(c *check.C) {
+       var err error
+       if s.testServer != nil {
+               err = s.testServer.Close()
+       }
+       c.Check(err, check.Equals, nil)
+}
+
+// Gocheck boilerplate
+func Test(t *testing.T) {
+       check.TestingT(t)
+}