5416: arv-git-httpd provides HTTP access to git repositories, using Arvados token...
authorTom Clegg <tom@curoverse.com>
Fri, 13 Mar 2015 22:04:53 +0000 (18:04 -0400)
committerTom Clegg <tom@curoverse.com>
Fri, 13 Mar 2015 22:04:53 +0000 (18:04 -0400)
services/arv-git-httpd/.gitignore [new file with mode: 0644]
services/arv-git-httpd/auth_handler.go [new file with mode: 0644]
services/arv-git-httpd/doc.go [new file with mode: 0644]
services/arv-git-httpd/main.go [new file with mode: 0644]
services/arv-git-httpd/server.go [new file with mode: 0644]

diff --git a/services/arv-git-httpd/.gitignore b/services/arv-git-httpd/.gitignore
new file mode 100644 (file)
index 0000000..1ae1045
--- /dev/null
@@ -0,0 +1 @@
+arv-git-httpd
diff --git a/services/arv-git-httpd/auth_handler.go b/services/arv-git-httpd/auth_handler.go
new file mode 100644 (file)
index 0000000..f182bca
--- /dev/null
@@ -0,0 +1,166 @@
+package main
+
+import (
+       "log"
+       "net/http"
+       "net/http/cgi"
+       "os"
+       "strings"
+       "sync"
+       "time"
+
+       "git.curoverse.com/arvados.git/sdk/go/arvadosclient"
+)
+
+func newArvadosClient() interface{} {
+       // MakeArvadosClient returns an error if token is unset (even
+       // though we don't need to do anything requiring
+       // authentication yet).
+       os.Setenv("ARVADOS_API_TOKEN", "xxx")
+       arv, err := arvadosclient.MakeArvadosClient()
+       if err != nil {
+               log.Println("MakeArvadosClient:", err)
+               return nil
+       }
+       return &arv
+}
+
+var connectionPool = &sync.Pool{New: newArvadosClient}
+
+type spyingResponseWriter struct {
+       http.ResponseWriter
+       wroteStatus *int
+}
+
+func (w spyingResponseWriter) WriteHeader(s int) {
+       *w.wroteStatus = s
+       w.ResponseWriter.WriteHeader(s)
+}
+
+type authHandler struct {
+       handler *cgi.Handler
+}
+
+func (h *authHandler) ServeHTTP(wOrig http.ResponseWriter, r *http.Request) {
+       var statusCode int
+       var statusText string
+       var username, password string
+       var repoName string
+       var wroteStatus int
+
+       w := spyingResponseWriter{wOrig, &wroteStatus}
+
+       defer func() {
+               if wroteStatus == 0 {
+                       // Nobody has called WriteHeader yet: that must be our job.
+                       w.WriteHeader(statusCode)
+                       w.Write([]byte(statusText))
+               }
+               log.Println(quoteStrings(r.RemoteAddr, username, password, wroteStatus, statusText, repoName, r.URL.Path)...)
+       }()
+
+       // HTTP request username is logged, but unused. Password is an
+       // Arvados API token.
+       username, password, ok := r.BasicAuth()
+       if !ok || username == "" || password == "" {
+               statusCode, statusText = http.StatusUnauthorized, "no credentials provided"
+               w.Header().Add("WWW-Authenticate", "basic")
+               return
+       }
+
+       // Access to paths "/foo/bar.git/*" and "/foo/bar/.git/*" are
+       // protected by the permissions on the repository named
+       // "foo/bar".
+       pathParts := strings.SplitN(r.URL.Path[1:], ".git/", 2)
+       if len(pathParts) != 2 {
+               statusCode, statusText = http.StatusBadRequest, "bad request"
+               return
+       }
+       repoName = pathParts[0]
+       repoName = strings.TrimRight(repoName, "/")
+
+       // Regardless of whether the client asked for "/foo.git" or
+       // "/foo/.git", we choose whichever variant exists in our repo
+       // root. If neither exists, we won't even bother checking
+       // authentication.
+       rewrittenPath := ""
+       tryDirs := []string{
+               "/" + repoName + ".git",
+               "/" + repoName + "/.git",
+       }
+       for _, dir := range tryDirs {
+               if fileInfo, err := os.Stat(theConfig.Root + dir); err != nil {
+                       if !os.IsNotExist(err) {
+                               statusCode, statusText = http.StatusInternalServerError, err.Error()
+                               return
+                       }
+               } else if fileInfo.IsDir() {
+                       rewrittenPath = dir + "/" + pathParts[1]
+                       break
+               }
+       }
+       if rewrittenPath == "" {
+               statusCode, statusText = http.StatusNotFound, "not found"
+               return
+       }
+       r.URL.Path = rewrittenPath
+
+       arv, ok := connectionPool.Get().(*arvadosclient.ArvadosClient)
+       if !ok || arv == nil {
+               statusCode, statusText = http.StatusInternalServerError, "connection pool failed"
+               return
+       }
+       defer connectionPool.Put(arv)
+
+       // Ask API server whether the repository is readable using this token (by trying to read it!)
+       arv.ApiToken = password
+       reposFound := arvadosclient.Dict{}
+       if err := arv.List("repositories", arvadosclient.Dict{
+               "filters": [][]string{[]string{"name", "=", repoName}},
+       }, &reposFound); err != nil {
+               statusCode, statusText = http.StatusInternalServerError, err.Error()
+               return
+       }
+       if avail, ok := reposFound["items_available"].(float64); !ok {
+               statusCode, statusText = http.StatusInternalServerError, "bad list response from API"
+               return
+       } else if avail < 1 {
+               statusCode, statusText = http.StatusNotFound, "not found"
+               return
+       } else if avail > 1 {
+               statusCode, statusText = http.StatusInternalServerError, "name collision"
+               return
+       }
+       isWrite := strings.HasSuffix(r.URL.Path, "/git-receive-pack")
+       if !isWrite {
+               statusText = "read"
+       } else {
+               uuid := reposFound["items"].([]interface{})[0].(map[string]interface{})["uuid"].(string)
+               err := arv.Update("repositories", uuid, arvadosclient.Dict{
+                       "repository": arvadosclient.Dict{
+                               "modified_at": time.Now().String(),
+                       },
+               }, &arvadosclient.Dict{})
+               if err != nil {
+                       statusCode, statusText = http.StatusForbidden, err.Error()
+                       return
+               }
+               statusText = "write"
+       }
+       handlerCopy := *h.handler
+       handlerCopy.Env = append(handlerCopy.Env, "REMOTE_USER="+r.RemoteAddr) // Should be username
+       handlerCopy.ServeHTTP(&w, r)
+}
+
+var escaper = strings.NewReplacer("\"", "\\\"", "\\", "\\\\", "\n", "\\n")
+
+// Transform strings so they are safer to write in logs (e.g.,
+// 'foo"bar' becomes '"foo\"bar"'). Non-string args are left alone.
+func quoteStrings(args ...interface{}) []interface{} {
+       for i, arg := range args {
+               if s, ok := arg.(string); ok {
+                       args[i] = "\"" + escaper.Replace(s) + "\""
+               }
+       }
+       return args
+}
diff --git a/services/arv-git-httpd/doc.go b/services/arv-git-httpd/doc.go
new file mode 100644 (file)
index 0000000..21e8e48
--- /dev/null
@@ -0,0 +1,7 @@
+/*
+arv-git-httpd provides authenticated access to Arvados-hosted git repositories.
+
+Example:
+       arv-git-httpd -address=:8000 -repo-root=/var/lib/arvados/git
+*/
+package main
diff --git a/services/arv-git-httpd/main.go b/services/arv-git-httpd/main.go
new file mode 100644 (file)
index 0000000..47758f6
--- /dev/null
@@ -0,0 +1,41 @@
+package main
+
+import (
+       "flag"
+       "log"
+       "os"
+)
+
+type config struct {
+       Addr       string
+       GitCommand string
+       Root       string
+}
+
+var theConfig *config
+
+func init() {
+       theConfig = &config{}
+       flag.StringVar(&theConfig.Addr, "address", "0.0.0.0:80",
+               "Address to listen on, \"host:port\".")
+       flag.StringVar(&theConfig.GitCommand, "git-command", "/usr/bin/git",
+               "Path to git executable. Each authenticated request will execute this program with a single argument, \"http-backend\".")
+       cwd, err := os.Getwd()
+       if err != nil {
+               log.Fatalln("Getwd():", err)
+       }
+       flag.StringVar(&theConfig.Root, "repo-root", cwd,
+               "Path to git repositories.")
+}
+
+func main() {
+       flag.Parse()
+       srv := &server{}
+       if err := srv.Start(); err != nil {
+               log.Fatal(err)
+       }
+       log.Println("Listening at", srv.Addr)
+       if err := srv.Wait(); err != nil {
+               log.Fatal(err)
+       }
+}
diff --git a/services/arv-git-httpd/server.go b/services/arv-git-httpd/server.go
new file mode 100644 (file)
index 0000000..393b6c4
--- /dev/null
@@ -0,0 +1,101 @@
+package main
+
+import (
+       "net"
+       "net/http"
+       "net/http/cgi"
+       "sync"
+       "time"
+)
+
+type server struct {
+       http.Server
+       Addr     string // host:port where the server is listening.
+       err      error
+       cond     *sync.Cond
+       done     bool
+       listener *net.TCPListener
+       wantDown bool
+}
+
+func (srv *server) Start() error {
+       gitHandler := &cgi.Handler{
+               Path: theConfig.GitCommand,
+               Dir:  theConfig.Root,
+               Env: []string{
+                       "GIT_PROJECT_ROOT=" + theConfig.Root,
+                       "GIT_HTTP_EXPORT_ALL=",
+               },
+               InheritEnv: []string{"PATH"},
+               Args:       []string{"http-backend"},
+       }
+
+       // The rest of the work here is essentially
+       // http.ListenAndServe() with two more features: (1) whoever
+       // called Start() can discover which address:port we end up
+       // listening to -- which makes listening on ":0" useful in
+       // test suites -- and (2) the server can be shut down without
+       // killing the process -- which is useful in test cases, and
+       // makes it possible to shut down gracefully on SIGTERM
+       // without killing active connections.
+
+       addr, err := net.ResolveTCPAddr("tcp", theConfig.Addr)
+       if err != nil {
+               return err
+       }
+       srv.listener, err = net.ListenTCP("tcp", addr)
+       if err != nil {
+               return err
+       }
+       srv.Addr = srv.listener.Addr().String()
+       mux := http.NewServeMux()
+       mux.Handle("/", &authHandler{gitHandler})
+       srv.Handler = mux
+
+       mutex := &sync.RWMutex{}
+       srv.cond = sync.NewCond(mutex.RLocker())
+       go func() {
+               err = srv.Serve(tcpKeepAliveListener{srv.listener})
+               if !srv.wantDown {
+                       srv.err = err
+               }
+               mutex.Lock()
+               srv.done = true
+               srv.cond.Broadcast()
+               mutex.Unlock()
+       }()
+       return nil
+}
+
+// Wait returns when the server has shut down.
+func (srv *server) Wait() error {
+       srv.cond.L.Lock()
+       defer srv.cond.L.Unlock()
+       for !srv.done {
+               srv.cond.Wait()
+       }
+       return srv.err
+}
+
+// Close shuts down the server and returns when it has stopped.
+func (srv *server) Close() error {
+       srv.wantDown = true
+       srv.listener.Close()
+       return srv.Wait()
+}
+
+// tcpKeepAliveListener is copied from net/http because not exported.
+//
+type tcpKeepAliveListener struct {
+       *net.TCPListener
+}
+
+func (ln tcpKeepAliveListener) Accept() (c net.Conn, err error) {
+       tc, err := ln.AcceptTCP()
+       if err != nil {
+               return
+       }
+       tc.SetKeepAlive(true)
+       tc.SetKeepAlivePeriod(3 * time.Minute)
+       return tc, nil
+}