Merge branch '16212-pam-login'
authorTom Clegg <tom@tomclegg.ca>
Wed, 8 Apr 2020 14:11:23 +0000 (10:11 -0400)
committerTom Clegg <tom@tomclegg.ca>
Wed, 8 Apr 2020 14:11:23 +0000 (10:11 -0400)
refs #16212

Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@tomclegg.ca>

48 files changed:
build/run-build-packages.sh
build/run-tests.sh
cmd/arvados-server/arvados-ws.service [moved from services/ws/arvados-ws.service with 94% similarity]
cmd/arvados-server/cmd.go
doc/admin/metrics.html.textile.liquid
lib/boot/cmd.go
lib/boot/nginx.go
lib/boot/postgresql.go
lib/boot/supervisor.go
lib/controller/handler.go
lib/dispatchcloud/dispatcher.go
lib/install/arvadostest_docker_build.sh [new file with mode: 0755]
lib/install/arvadostest_docker_run.sh [new file with mode: 0755]
lib/install/deps.go [new file with mode: 0644]
lib/install/deps_test.go [new file with mode: 0644]
lib/install/example_from_scratch.sh [new file with mode: 0644]
lib/service/cmd.go
lib/service/cmd_test.go
lib/service/error.go
sdk/go/arvados/config.go
sdk/go/arvados/config_test.go
sdk/go/health/aggregator.go
sdk/go/httpserver/logger.go
sdk/java-v2/build.gradle
sdk/python/tests/run_test_server.py
sdk/python/tests/test_keep_client.py
services/keep-balance/server.go
services/keepstore/command.go
services/keepstore/unix_volume_test.go
services/ws/doc.go
services/ws/event.go
services/ws/event_source.go
services/ws/event_source_test.go
services/ws/event_test.go
services/ws/gocheck_test.go
services/ws/handler.go
services/ws/main.go [deleted file]
services/ws/permission.go
services/ws/permission_test.go
services/ws/router.go
services/ws/server.go [deleted file]
services/ws/service.go [new file with mode: 0644]
services/ws/service_test.go [moved from services/ws/server_test.go with 55% similarity]
services/ws/session.go
services/ws/session_v0.go
services/ws/session_v0_test.go
services/ws/session_v1.go
tools/arvbox/lib/arvbox/docker/service/websockets/run-service

index 4faa1c6b0d4b0e83d12d27b997615fbf78031284..3ba1dcc05e8776fc57a205e2deb79a0224a8e370 100755 (executable)
@@ -308,7 +308,7 @@ package_go_binary services/keepstore keepstore \
     "Keep storage daemon, accessible to clients on the LAN"
 package_go_binary services/keep-web keep-web \
     "Static web hosting service for user data stored in Arvados Keep"
-package_go_binary services/ws arvados-ws \
+package_go_binary cmd/arvados-server arvados-ws \
     "Arvados Websocket server"
 package_go_binary tools/sync-groups arvados-sync-groups \
     "Synchronize remote groups into Arvados from an external source"
index 72a7620ba7a0fab018f1785adc7464538db7c87a..0212d1bc0e13e7b6202a04f4da00436a6c278ed1 100755 (executable)
@@ -35,7 +35,7 @@ Options:
 --short        Skip (or scale down) some slow tests.
 --interactive  Set up, then prompt for test/install steps to perform.
 WORKSPACE=path Arvados source tree to test.
-CONFIGSRC=path Dir with config.yml file containing PostgreSQL section for use by tests. (required)
+CONFIGSRC=path Dir with config.yml file containing PostgreSQL section for use by tests.
 services/api_test="TEST=test/functional/arvados/v1/collections_controller_test.rb"
                Restrict apiserver tests to the given file
 sdk/python_test="--test-suite tests.test_keep_locator"
@@ -197,10 +197,8 @@ sanity_checks() {
     [[ -n "${skip[sanity]}" ]] && return 0
     ( [[ -n "$WORKSPACE" ]] && [[ -d "$WORKSPACE/services" ]] ) \
         || fatal "WORKSPACE environment variable not set to a source directory (see: $0 --help)"
-    [[ -n "$CONFIGSRC" ]] \
-       || fatal "CONFIGSRC environment not set (see: $0 --help)"
-    [[ -s "$CONFIGSRC/config.yml" ]] \
-       || fatal "'$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
+    [[ -z "$CONFIGSRC" ]] || [[ -s "$CONFIGSRC/config.yml" ]] \
+       || fatal "CONFIGSRC is $CONFIGSRC but '$CONFIGSRC/config.yml' is empty or not found (see: $0 --help)"
     echo Checking dependencies:
     echo "locale: ${LANG}"
     [[ "$(locale charmap)" = "UTF-8" ]] \
@@ -556,8 +554,14 @@ setup_ruby_environment() {
         bundle="$(gem env gempath | cut -f1 -d:)/bin/bundle"
         (
             export HOME=$GEMHOME
-            ("$bundle" version | grep -q 2.0.2) \
-                || gem install --user bundler -v 2.0.2
+            bundlers="$(gem list --details bundler)"
+            versions=(1.11.0 1.17.3 2.0.2)
+            for v in ${versions[@]}; do
+                if ! echo "$bundlers" | fgrep -q "($v)"; then
+                    gem install --user $(for v in ${versions[@]}; do echo bundler:${v}; done)
+                    break
+                fi
+            done
             "$bundle" version | tee /dev/stderr | grep -q 'version 2'
         ) || fatal 'install bundler'
     fi
@@ -593,6 +597,11 @@ setup_virtualenv() {
 }
 
 initialize() {
+    # If dependencies like ruby, go, etc. are installed in
+    # /var/lib/arvados -- presumably by "arvados-server install" --
+    # then we want to use those versions, instead of whatever happens
+    # to be installed in /usr.
+    PATH="/var/lib/arvados/bin:${PATH}"
     sanity_checks
 
     echo "WORKSPACE=$WORKSPACE"
@@ -1059,7 +1068,7 @@ test_sdk/cli() {
 }
 
 test_sdk/java-v2() {
-    cd "$WORKSPACE/sdk/java-v2" && gradle test
+    cd "$WORKSPACE/sdk/java-v2" && gradle test ${testargs[sdk/java-v2]}
 }
 
 test_services/login-sync() {
similarity index 94%
rename from services/ws/arvados-ws.service
rename to cmd/arvados-server/arvados-ws.service
index 36624c78779c02cfde829323551ca9c2cb19eda3..aebc56a79f333b19f061f5f0aadce793e799529c 100644 (file)
@@ -6,6 +6,7 @@
 Description=Arvados websocket server
 Documentation=https://doc.arvados.org/
 After=network.target
+AssertPathExists=/etc/arvados/config.yml
 
 # systemd==229 (ubuntu:xenial) obeys StartLimitInterval in the [Unit] section
 StartLimitInterval=0
index a9d927d8734401f76fa173bff7214e0038fc4c68..fcea2223da70d5a174ee74b8281ebd3d20e0b503 100644 (file)
@@ -14,6 +14,8 @@ import (
        "git.arvados.org/arvados.git/lib/controller"
        "git.arvados.org/arvados.git/lib/crunchrun"
        "git.arvados.org/arvados.git/lib/dispatchcloud"
+       "git.arvados.org/arvados.git/lib/install"
+       "git.arvados.org/arvados.git/services/ws"
 )
 
 var (
@@ -25,11 +27,13 @@ var (
                "boot":            boot.Command,
                "cloudtest":       cloudtest.Command,
                "config-check":    config.CheckCommand,
-               "config-dump":     config.DumpCommand,
                "config-defaults": config.DumpDefaultsCommand,
+               "config-dump":     config.DumpCommand,
                "controller":      controller.Command,
                "crunch-run":      crunchrun.Command,
                "dispatch-cloud":  dispatchcloud.Command,
+               "install":         install.Command,
+               "ws":              ws.Command,
        })
 )
 
index 9616d4add43a44105d78fbf5ff6f4ae9b8e1c3cd..a6a0862c4f1d1383a44a80832b42cebaafd7f569 100644 (file)
@@ -36,7 +36,7 @@ table(table table-bordered table-condensed table-hover).
 |arvados-dispatch-cloud|✓|
 |arvados-git-httpd||
 |arvados-node-manager||
-|arvados-ws||
+|arvados-ws||
 |composer||
 |keepproxy||
 |keepstore|✓|
index 1abc93722d8b872cce0d524bf55f5277e6487a5c..5147e3ac33bb65ea8dc0305f986b30a69d736785 100644 (file)
@@ -6,9 +6,11 @@ package boot
 
 import (
        "context"
+       "errors"
        "flag"
        "fmt"
        "io"
+       "time"
 
        "git.arvados.org/arvados.git/lib/cmd"
        "git.arvados.org/arvados.git/lib/config"
@@ -56,6 +58,8 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou
        flags.StringVar(&super.ListenHost, "listen-host", "localhost", "host name or interface address for service listeners")
        flags.StringVar(&super.ControllerAddr, "controller-address", ":0", "desired controller address, `host:port` or `:port`")
        flags.BoolVar(&super.OwnTemporaryDatabase, "own-temporary-database", false, "bring up a postgres server and create a temporary database")
+       timeout := flags.Duration("timeout", 0, "maximum time to wait for cluster to be ready")
+       shutdown := flags.Bool("shutdown", false, "shut down when the cluster becomes ready")
        err = flags.Parse(args)
        if err == flag.ErrHelp {
                err = nil
@@ -77,14 +81,27 @@ func (bootCommand) RunCommand(prog string, args []string, stdin io.Reader, stdou
 
        super.Start(ctx, cfg)
        defer super.Stop()
+
+       var timer *time.Timer
+       if *timeout > 0 {
+               timer = time.AfterFunc(*timeout, super.Stop)
+       }
+
        url, ok := super.WaitReady()
-       if !ok {
+       if timer != nil && !timer.Stop() {
+               err = errors.New("boot timed out")
+               return 1
+       } else if !ok {
+               err = errors.New("boot failed")
                return 1
        }
        // Write controller URL to stdout. Nothing else goes to
        // stdout, so this provides an easy way for a calling script
        // to discover the controller URL when everything is ready.
        fmt.Fprintln(stdout, url)
+       if *shutdown {
+               super.Stop()
+       }
        // Wait for signal/crash + orderly shutdown
        <-super.done
        return 0
index 6b2d6777fdf38053e02eb2a676e2eb3bd85ec241..ecbb7a9d3a40f9cfb916f7c89ff3f5841a38ac23 100644 (file)
@@ -26,15 +26,18 @@ func (runNginx) String() string {
 }
 
 func (runNginx) Run(ctx context.Context, fail func(error), super *Supervisor) error {
+       err := super.wait(ctx, createCertificates{})
+       if err != nil {
+               return err
+       }
        vars := map[string]string{
                "LISTENHOST": super.ListenHost,
-               "SSLCERT":    filepath.Join(super.SourcePath, "services", "api", "tmp", "self-signed.pem"), // TODO: root ca
-               "SSLKEY":     filepath.Join(super.SourcePath, "services", "api", "tmp", "self-signed.key"), // TODO: root ca
+               "SSLCERT":    filepath.Join(super.tempdir, "server.crt"),
+               "SSLKEY":     filepath.Join(super.tempdir, "server.key"),
                "ACCESSLOG":  filepath.Join(super.tempdir, "nginx_access.log"),
                "ERRORLOG":   filepath.Join(super.tempdir, "nginx_error.log"),
                "TMPDIR":     super.tempdir,
        }
-       var err error
        for _, cmpt := range []struct {
                varname string
                svc     arvados.Service
index df98904151834a8b22dfbfc429f7e9882ad6e7ae..34ccf04a88dbd68a7822cc75b13da972e32844ee 100644 (file)
@@ -11,7 +11,9 @@ import (
        "fmt"
        "os"
        "os/exec"
+       "os/user"
        "path/filepath"
+       "strconv"
        "strings"
        "time"
 
@@ -34,6 +36,13 @@ func (runPostgreSQL) Run(ctx context.Context, fail func(error), super *Superviso
                return err
        }
 
+       iamroot := false
+       if u, err := user.Current(); err != nil {
+               return fmt.Errorf("user.Current(): %s", err)
+       } else if u.Uid == "0" {
+               iamroot = true
+       }
+
        buf := bytes.NewBuffer(nil)
        err = super.RunProgram(ctx, super.tempdir, buf, nil, "pg_config", "--bindir")
        if err != nil {
@@ -42,11 +51,45 @@ func (runPostgreSQL) Run(ctx context.Context, fail func(error), super *Superviso
        bindir := strings.TrimSpace(buf.String())
 
        datadir := filepath.Join(super.tempdir, "pgdata")
-       err = os.Mkdir(datadir, 0755)
+       err = os.Mkdir(datadir, 0700)
        if err != nil {
                return err
        }
-       err = super.RunProgram(ctx, super.tempdir, nil, nil, filepath.Join(bindir, "initdb"), "-D", datadir)
+       prog, args := filepath.Join(bindir, "initdb"), []string{"-D", datadir, "-E", "utf8"}
+       if iamroot {
+               postgresUser, err := user.Lookup("postgres")
+               if err != nil {
+                       return fmt.Errorf("user.Lookup(\"postgres\"): %s", err)
+               }
+               postgresUid, err := strconv.Atoi(postgresUser.Uid)
+               if err != nil {
+                       return fmt.Errorf("user.Lookup(\"postgres\"): non-numeric uid?: %q", postgresUser.Uid)
+               }
+               postgresGid, err := strconv.Atoi(postgresUser.Gid)
+               if err != nil {
+                       return fmt.Errorf("user.Lookup(\"postgres\"): non-numeric gid?: %q", postgresUser.Gid)
+               }
+               err = os.Chown(super.tempdir, 0, postgresGid)
+               if err != nil {
+                       return err
+               }
+               err = os.Chmod(super.tempdir, 0710)
+               if err != nil {
+                       return err
+               }
+               err = os.Chown(datadir, postgresUid, 0)
+               if err != nil {
+                       return err
+               }
+               // We can't use "sudo -u" here because it creates an
+               // intermediate process that interferes with our
+               // ability to reliably kill postgres. The setuidgid
+               // program just calls exec without forking, so it
+               // doesn't have this problem.
+               args = append([]string{"postgres", prog}, args...)
+               prog = "setuidgid"
+       }
+       err = super.RunProgram(ctx, super.tempdir, nil, nil, prog, args...)
        if err != nil {
                return err
        }
@@ -55,18 +98,29 @@ func (runPostgreSQL) Run(ctx context.Context, fail func(error), super *Superviso
        if err != nil {
                return err
        }
+       if iamroot {
+               err = super.RunProgram(ctx, super.tempdir, nil, nil, "chown", "postgres", datadir+"/server.crt", datadir+"/server.key")
+               if err != nil {
+                       return err
+               }
+       }
 
        port := super.cluster.PostgreSQL.Connection["port"]
 
        super.waitShutdown.Add(1)
        go func() {
                defer super.waitShutdown.Done()
-               fail(super.RunProgram(ctx, super.tempdir, nil, nil, filepath.Join(bindir, "postgres"),
+               prog, args := filepath.Join(bindir, "postgres"), []string{
                        "-l",          // enable ssl
                        "-D", datadir, // data dir
                        "-k", datadir, // socket dir
                        "-p", super.cluster.PostgreSQL.Connection["port"],
-               ))
+               }
+               if iamroot {
+                       args = append([]string{"postgres", prog}, args...)
+                       prog = "setuidgid"
+               }
+               fail(super.RunProgram(ctx, super.tempdir, nil, nil, prog, args...))
        }()
 
        for {
@@ -78,11 +132,15 @@ func (runPostgreSQL) Run(ctx context.Context, fail func(error), super *Superviso
                }
                time.Sleep(time.Second / 2)
        }
-       db, err := sql.Open("postgres", arvados.PostgreSQLConnection{
+       pgconn := arvados.PostgreSQLConnection{
                "host":   datadir,
                "port":   port,
                "dbname": "postgres",
-       }.String())
+       }
+       if iamroot {
+               pgconn["user"] = "postgres"
+       }
+       db, err := sql.Open("postgres", pgconn.String())
        if err != nil {
                return fmt.Errorf("db open failed: %s", err)
        }
@@ -96,7 +154,7 @@ func (runPostgreSQL) Run(ctx context.Context, fail func(error), super *Superviso
        if err != nil {
                return fmt.Errorf("createuser failed: %s", err)
        }
-       _, err = conn.ExecContext(ctx, `CREATE DATABASE `+pq.QuoteIdentifier(super.cluster.PostgreSQL.Connection["dbname"]))
+       _, err = conn.ExecContext(ctx, `CREATE DATABASE `+pq.QuoteIdentifier(super.cluster.PostgreSQL.Connection["dbname"])+` WITH TEMPLATE template0 ENCODING 'utf8'`)
        if err != nil {
                return fmt.Errorf("createdb failed: %s", err)
        }
index bcf87812ab813c39a0bbcc6c801b9069c9dd4c7b..7f5d6a9baae2dd4eaa2b2e66fea9585f7be3bdc1 100644 (file)
@@ -126,7 +126,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error {
        super.setEnv("ARVADOS_CONFIG", super.configfile)
        super.setEnv("RAILS_ENV", super.ClusterType)
        super.setEnv("TMPDIR", super.tempdir)
-       super.prependEnv("PATH", filepath.Join(super.tempdir, "bin")+":")
+       super.prependEnv("PATH", super.tempdir+"/bin:/var/lib/arvados/bin:")
 
        super.cluster, err = cfg.GetCluster("")
        if err != nil {
@@ -182,7 +182,7 @@ func (super *Supervisor) run(cfg *arvados.Config) error {
                runGoProgram{src: "services/keepproxy", svc: super.cluster.Services.Keepproxy, depends: []supervisedTask{runPassenger{src: "services/api"}}},
                runGoProgram{src: "services/keepstore", svc: super.cluster.Services.Keepstore},
                runGoProgram{src: "services/keep-web", svc: super.cluster.Services.WebDAV},
-               runGoProgram{src: "services/ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{runPostgreSQL{}}},
+               runServiceCommand{name: "ws", svc: super.cluster.Services.Websocket, depends: []supervisedTask{runPostgreSQL{}}},
                installPassenger{src: "services/api"},
                runPassenger{src: "services/api", svc: super.cluster.Services.RailsAPI, depends: []supervisedTask{createCertificates{}, runPostgreSQL{}, installPassenger{src: "services/api"}}},
                installPassenger{src: "apps/workbench", depends: []supervisedTask{installPassenger{src: "services/api"}}}, // dependency ensures workbench doesn't delay api startup
@@ -360,7 +360,11 @@ func (super *Supervisor) setupRubyEnv() error {
                        "GEM_HOME=",
                        "GEM_PATH=",
                })
-               cmd := exec.Command("gem", "env", "gempath")
+               gem := "gem"
+               if _, err := os.Stat("/var/lib/arvados/bin/gem"); err == nil {
+                       gem = "/var/lib/arvados/bin/gem"
+               }
+               cmd := exec.Command(gem, "env", "gempath")
                cmd.Env = super.environ
                buf, err := cmd.Output() // /var/lib/arvados/.gem/ruby/2.5.0/bin:...
                if err != nil || len(buf) == 0 {
@@ -406,7 +410,11 @@ func (super *Supervisor) RunProgram(ctx context.Context, dir string, output io.W
        cmdline := fmt.Sprintf("%s", append([]string{prog}, args...))
        super.logger.WithField("command", cmdline).WithField("dir", dir).Info("executing")
 
-       logprefix := strings.TrimPrefix(prog, super.tempdir+"/bin/")
+       logprefix := prog
+       if logprefix == "setuidgid" && len(args) >= 3 {
+               logprefix = args[2]
+       }
+       logprefix = strings.TrimPrefix(logprefix, super.tempdir+"/bin/")
        if logprefix == "bundle" && len(args) > 2 && args[0] == "exec" {
                logprefix = args[1]
        } else if logprefix == "arvados-server" && len(args) > 1 {
index d62ffe2fd40036af86fd65af1823931e22311259..01f2161632bf8e6562f51b4266e43602b90218c6 100644 (file)
@@ -67,6 +67,10 @@ func (h *Handler) CheckHealth() error {
        return err
 }
 
+func (h *Handler) Done() <-chan struct{} {
+       return nil
+}
+
 func neverRedirect(*http.Request, []*http.Request) error { return http.ErrUseLastResponse }
 
 func (h *Handler) setup() {
index 4023896f7933dbbd489387405419097dc083434e..02b6c976aec825f810eab3cca43488c808d5cc4e 100644 (file)
@@ -82,6 +82,11 @@ func (disp *dispatcher) CheckHealth() error {
        return disp.pool.CheckHealth()
 }
 
+// Done implements service.Handler.
+func (disp *dispatcher) Done() <-chan struct{} {
+       return disp.stopped
+}
+
 // Stop dispatching containers and release resources. Typically used
 // in tests.
 func (disp *dispatcher) Close() {
diff --git a/lib/install/arvadostest_docker_build.sh b/lib/install/arvadostest_docker_build.sh
new file mode 100755 (executable)
index 0000000..e0defa8
--- /dev/null
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+set -ex -o pipefail
+
+SRC=$(realpath $(dirname ${BASH_SOURCE[0]})/../..)
+
+ctrname=arvadostest
+ctrbase=${ctrname}
+if [[ "${1}" != "--update" ]] || ! docker images --format={{.Repository}} | grep -x ${ctrbase}; then
+    ctrbase=debian:10
+fi
+
+if docker ps -a --format={{.Names}} | grep -x ${ctrname}; then
+    echo >&2 "container name already in use -- another builder running?"
+    exit 1
+fi
+
+(cd ${SRC}/cmd/arvados-server && go install)
+trap "docker rm --volumes ${ctrname}" ERR
+docker run -it --name ${ctrname} \
+       -v ${GOPATH:-${HOME}/go}/bin/arvados-server:/bin/arvados-server:ro \
+       -v ${SRC}:/src/arvados:ro \
+       -v /tmp \
+       --env http_proxy \
+       --env https_proxy \
+       ${ctrbase} \
+       bash -c "
+set -ex -o pipefail
+arvados-server install -type test
+pg_ctlcluster 11 main start
+cp -a /src/arvados /tmp/
+cd /tmp/arvados
+rm -rf tmp config.yml database.yml services/api/config/database.yml
+mkdir tmp
+build/run-tests.sh WORKSPACE=\$PWD --temp /tmp/arvados/tmp --only x"
+docker commit ${ctrname} ${ctrname}
+trap - ERR
+docker rm --volumes ${ctrname}
diff --git a/lib/install/arvadostest_docker_run.sh b/lib/install/arvadostest_docker_run.sh
new file mode 100755 (executable)
index 0000000..ca53655
--- /dev/null
@@ -0,0 +1,34 @@
+#!/bin/bash
+
+# Example:
+#
+# ./arvadostest_docker_build.sh             # build the base image ("arvadostest")
+# ./arvadostest_docker_build.sh --update    # update the base image with current version of `arvados-server install`
+# ./arvadostest_docker_run.sh --interactive # start a container using the previously built base image, copy this source tree into it, and invoke run-tests.sh with the given args
+
+set -ex -o pipefail
+
+declare -a qargs
+for arg in "$@"; do
+    qargs+=("${arg@Q}")
+done
+
+SRC=$(realpath $(dirname ${BASH_SOURCE[0]})/../..)
+
+docker run --rm -it \
+       --privileged \
+       -v /dev/fuse:/dev/fuse \
+       -v ${SRC}:/src/arvados:ro \
+       -v /tmp \
+       --env http_proxy \
+       --env https_proxy \
+       arvadostest \
+       bash -c "
+set -ex -o pipefail
+pg_ctlcluster 11 main start
+cp -a /src/arvados /tmp/
+cd /tmp/arvados
+rm -rf tmp config.yml database.yml services/api/config/database.yml
+mkdir tmp
+go run ./cmd/arvados-server install -type test
+build/run-tests.sh WORKSPACE=\$PWD --temp /tmp/arvados/tmp ${qargs[@]}"
diff --git a/lib/install/deps.go b/lib/install/deps.go
new file mode 100644 (file)
index 0000000..cbcf743
--- /dev/null
@@ -0,0 +1,417 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package install
+
+import (
+       "bufio"
+       "bytes"
+       "context"
+       "errors"
+       "flag"
+       "fmt"
+       "io"
+       "os"
+       "os/exec"
+       "strconv"
+       "strings"
+       "syscall"
+       "time"
+
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/lib/pq"
+)
+
+var Command cmd.Handler = installCommand{}
+
+const devtestDatabasePassword = "insecure_arvados_test"
+
+type installCommand struct{}
+
+func (installCommand) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
+       logger := ctxlog.New(stderr, "text", "info")
+       ctx := ctxlog.Context(context.Background(), logger)
+       ctx, cancel := context.WithCancel(ctx)
+       defer cancel()
+
+       var err error
+       defer func() {
+               if err != nil {
+                       logger.WithError(err).Info("exiting")
+               }
+       }()
+
+       flags := flag.NewFlagSet(prog, flag.ContinueOnError)
+       flags.SetOutput(stderr)
+       versionFlag := flags.Bool("version", false, "Write version information to stdout and exit 0")
+       clusterType := flags.String("type", "production", "cluster `type`: development, test, or production")
+       err = flags.Parse(args)
+       if err == flag.ErrHelp {
+               err = nil
+               return 0
+       } else if err != nil {
+               return 2
+       } else if *versionFlag {
+               return cmd.Version.RunCommand(prog, args, stdin, stdout, stderr)
+       }
+
+       var dev, test, prod bool
+       switch *clusterType {
+       case "development":
+               dev = true
+       case "test":
+               test = true
+       case "production":
+               prod = true
+       default:
+               err = fmt.Errorf("invalid cluster type %q (must be 'development', 'test', or 'production')", *clusterType)
+               return 2
+       }
+
+       if prod {
+               err = errors.New("production install is not yet implemented")
+               return 1
+       }
+
+       osv, err := identifyOS()
+       if err != nil {
+               return 1
+       }
+
+       listdir, err := os.Open("/var/lib/apt/lists")
+       if err != nil {
+               logger.Warnf("error while checking whether to run apt-get update: %s", err)
+       } else if names, _ := listdir.Readdirnames(1); len(names) == 0 {
+               // Special case for a base docker image where the
+               // package cache has been deleted and all "apt-get
+               // install" commands will fail unless we fetch repos.
+               cmd := exec.CommandContext(ctx, "apt-get", "update")
+               cmd.Stdout = stdout
+               cmd.Stderr = stderr
+               err = cmd.Run()
+               if err != nil {
+                       return 1
+               }
+       }
+
+       if dev || test {
+               debs := []string{
+                       "bison",
+                       "bsdmainutils",
+                       "build-essential",
+                       "ca-certificates",
+                       "cadaver",
+                       "curl",
+                       "cython",
+                       "daemontools", // lib/boot uses setuidgid to drop privileges when running as root
+                       "default-jdk-headless",
+                       "default-jre-headless",
+                       "fuse",
+                       "gettext",
+                       "git",
+                       "gitolite3",
+                       "graphviz",
+                       "haveged",
+                       "iceweasel",
+                       "libattr1-dev",
+                       "libcrypt-ssleay-perl",
+                       "libcrypt-ssleay-perl",
+                       "libcurl3-gnutls",
+                       "libcurl4-openssl-dev",
+                       "libfuse-dev",
+                       "libgnutls28-dev",
+                       "libjson-perl",
+                       "libjson-perl",
+                       "libpam-dev",
+                       "libpcre3-dev",
+                       "libpq-dev",
+                       "libpython2.7-dev",
+                       "libreadline-dev",
+                       "libssl-dev",
+                       "libwww-perl",
+                       "libxml2-dev",
+                       "libxslt1.1",
+                       "linkchecker",
+                       "lsof",
+                       "net-tools",
+                       "nginx",
+                       "pandoc",
+                       "perl-modules",
+                       "pkg-config",
+                       "postgresql",
+                       "postgresql-contrib",
+                       "python",
+                       "python3-dev",
+                       "python-epydoc",
+                       "r-base",
+                       "r-cran-testthat",
+                       "sudo",
+                       "virtualenv",
+                       "wget",
+                       "xvfb",
+                       "zlib1g-dev",
+               }
+               switch {
+               case osv.Debian && osv.Major >= 10:
+                       debs = append(debs, "libcurl4")
+               default:
+                       debs = append(debs, "libcurl3")
+               }
+               cmd := exec.CommandContext(ctx, "apt-get", "install", "--yes", "--no-install-recommends")
+               cmd.Args = append(cmd.Args, debs...)
+               cmd.Env = append(os.Environ(), "DEBIAN_FRONTEND=noninteractive")
+               cmd.Stdout = stdout
+               cmd.Stderr = stderr
+               err = cmd.Run()
+               if err != nil {
+                       return 1
+               }
+       }
+
+       os.Mkdir("/var/lib/arvados", 0755)
+       rubyversion := "2.5.7"
+       if haverubyversion, err := exec.Command("/var/lib/arvados/bin/ruby", "-v").CombinedOutput(); err == nil && bytes.HasPrefix(haverubyversion, []byte("ruby "+rubyversion)) {
+               logger.Print("ruby " + rubyversion + " already installed")
+       } else {
+               err = runBash(`
+mkdir -p /var/lib/arvados/tmp
+tmp=/var/lib/arvados/tmp/ruby-`+rubyversion+`
+trap "rm -r ${tmp}" ERR
+wget --progress=dot:giga -O- https://cache.ruby-lang.org/pub/ruby/2.5/ruby-`+rubyversion+`.tar.gz | tar -C /var/lib/arvados/tmp -xzf -
+cd ${tmp}
+./configure --disable-install-doc --prefix /var/lib/arvados
+make -j4
+make install
+/var/lib/arvados/bin/gem install bundler
+rm -r ${tmp}
+`, stdout, stderr)
+               if err != nil {
+                       return 1
+               }
+       }
+
+       if !prod {
+               goversion := "1.14"
+               if havegoversion, err := exec.Command("/usr/local/bin/go", "version").CombinedOutput(); err == nil && bytes.HasPrefix(havegoversion, []byte("go version go"+goversion+" ")) {
+                       logger.Print("go " + goversion + " already installed")
+               } else {
+                       err = runBash(`
+cd /tmp
+wget --progress=dot:giga -O- https://storage.googleapis.com/golang/go`+goversion+`.linux-amd64.tar.gz | tar -C /var/lib/arvados -xzf -
+ln -sf /var/lib/arvados/go/bin/* /usr/local/bin/
+`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               pjsversion := "1.9.8"
+               if havepjsversion, err := exec.Command("/usr/local/bin/phantomjs", "--version").CombinedOutput(); err == nil && string(havepjsversion) == "1.9.8\n" {
+                       logger.Print("phantomjs " + pjsversion + " already installed")
+               } else {
+                       err = runBash(`
+PJS=phantomjs-`+pjsversion+`-linux-x86_64
+wget --progress=dot:giga -O- https://bitbucket.org/ariya/phantomjs/downloads/$PJS.tar.bz2 | tar -C /var/lib/arvados -xjf -
+ln -sf /var/lib/arvados/$PJS/bin/phantomjs /usr/local/bin/
+`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               geckoversion := "0.24.0"
+               if havegeckoversion, err := exec.Command("/usr/local/bin/geckodriver", "--version").CombinedOutput(); err == nil && strings.Contains(string(havegeckoversion), " "+geckoversion+" ") {
+                       logger.Print("geckodriver " + geckoversion + " already installed")
+               } else {
+                       err = runBash(`
+GD=v`+geckoversion+`
+wget --progress=dot:giga -O- https://github.com/mozilla/geckodriver/releases/download/$GD/geckodriver-$GD-linux64.tar.gz | tar -C /var/lib/arvados/bin -xzf - geckodriver
+ln -sf /var/lib/arvados/bin/geckodriver /usr/local/bin/
+`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               nodejsversion := "v8.15.1"
+               if havenodejsversion, err := exec.Command("/usr/local/bin/node", "--version").CombinedOutput(); err == nil && string(havenodejsversion) == nodejsversion+"\n" {
+                       logger.Print("nodejs " + nodejsversion + " already installed")
+               } else {
+                       err = runBash(`
+NJS=`+nodejsversion+`
+wget --progress=dot:giga -O- https://nodejs.org/dist/${NJS}/node-${NJS}-linux-x64.tar.xz | sudo tar -C /var/lib/arvados -xJf -
+ln -sf /var/lib/arvados/node-${NJS}-linux-x64/bin/{node,npm} /usr/local/bin/
+`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               gradleversion := "5.3.1"
+               if havegradleversion, err := exec.Command("/usr/local/bin/gradle", "--version").CombinedOutput(); err == nil && strings.Contains(string(havegradleversion), "Gradle "+gradleversion+"\n") {
+                       logger.Print("gradle " + gradleversion + " already installed")
+               } else {
+                       err = runBash(`
+G=`+gradleversion+`
+mkdir -p /var/lib/arvados/tmp
+zip=/var/lib/arvados/tmp/gradle-${G}-bin.zip
+trap "rm ${zip}" ERR
+wget --progress=dot:giga -O${zip} https://services.gradle.org/distributions/gradle-${G}-bin.zip
+unzip -o -d /var/lib/arvados ${zip}
+ln -sf /var/lib/arvados/gradle-${G}/bin/gradle /usr/local/bin/
+rm ${zip}
+`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               // The entry in /etc/locale.gen is "en_US.UTF-8"; once
+               // it's installed, locale -a reports it as
+               // "en_US.utf8".
+               wantlocale := "en_US.UTF-8"
+               if havelocales, err := exec.Command("locale", "-a").CombinedOutput(); err == nil && bytes.Contains(havelocales, []byte(strings.Replace(wantlocale+"\n", "UTF-", "utf", 1))) {
+                       logger.Print("locale " + wantlocale + " already installed")
+               } else {
+                       err = runBash(`sed -i 's/^# *\(`+wantlocale+`\)/\1/' /etc/locale.gen && locale-gen`, stdout, stderr)
+                       if err != nil {
+                               return 1
+                       }
+               }
+
+               var pgc struct {
+                       Version       string
+                       Cluster       string
+                       Port          int
+                       Status        string
+                       Owner         string
+                       DataDirectory string
+                       LogFile       string
+               }
+               if pg_lsclusters, err2 := exec.Command("pg_lsclusters", "--no-header").CombinedOutput(); err2 != nil {
+                       err = fmt.Errorf("pg_lsclusters: %s", err2)
+                       return 1
+               } else if pgclusters := strings.Split(strings.TrimSpace(string(pg_lsclusters)), "\n"); len(pgclusters) != 1 {
+                       logger.Warnf("pg_lsclusters returned %d postgresql clusters -- skipping postgresql initdb/startup, hope that's ok", len(pgclusters))
+               } else if _, err = fmt.Sscanf(pgclusters[0], "%s %s %d %s %s %s %s", &pgc.Version, &pgc.Cluster, &pgc.Port, &pgc.Status, &pgc.Owner, &pgc.DataDirectory, &pgc.LogFile); err != nil {
+                       err = fmt.Errorf("error parsing pg_lsclusters output: %s", err)
+                       return 1
+               } else if pgc.Status == "online" {
+                       logger.Infof("postgresql cluster %s-%s is online", pgc.Version, pgc.Cluster)
+               } else {
+                       logger.Infof("postgresql cluster %s-%s is %s; trying to start", pgc.Version, pgc.Cluster, pgc.Status)
+                       cmd := exec.Command("pg_ctlcluster", "--foreground", pgc.Version, pgc.Cluster, "start")
+                       cmd.Stdout = stdout
+                       cmd.Stderr = stderr
+                       err = cmd.Start()
+                       if err != nil {
+                               return 1
+                       }
+                       defer func() {
+                               cmd.Process.Signal(syscall.SIGTERM)
+                               logger.Infof("sent SIGTERM; waiting for postgres to shut down")
+                               cmd.Wait()
+                       }()
+                       for deadline := time.Now().Add(10 * time.Second); ; {
+                               output, err2 := exec.Command("pg_isready").CombinedOutput()
+                               if err2 == nil {
+                                       break
+                               } else if time.Now().After(deadline) {
+                                       err = fmt.Errorf("timed out waiting for pg_isready (%q)", output)
+                                       return 1
+                               } else {
+                                       time.Sleep(time.Second)
+                               }
+                       }
+               }
+
+               if os.Getpid() == 1 {
+                       // We are the init process (presumably in a
+                       // docker container) so although postgresql is
+                       // installed, it's not running, and initdb
+                       // might never have been run.
+               }
+
+               withstuff := "WITH LOGIN SUPERUSER ENCRYPTED PASSWORD " + pq.QuoteLiteral(devtestDatabasePassword)
+               cmd := exec.Command("sudo", "-u", "postgres", "psql", "-c", "ALTER ROLE arvados "+withstuff)
+               cmd.Dir = "/"
+               if err := cmd.Run(); err == nil {
+                       logger.Print("arvados role exists; superuser privileges added, password updated")
+               } else {
+                       cmd := exec.Command("sudo", "-u", "postgres", "psql", "-c", "CREATE ROLE arvados "+withstuff)
+                       cmd.Dir = "/"
+                       cmd.Stdout = stdout
+                       cmd.Stderr = stderr
+                       err = cmd.Run()
+                       if err != nil {
+                               return 1
+                       }
+               }
+       }
+
+       return 0
+}
+
+type osversion struct {
+       Debian bool
+       Ubuntu bool
+       Major  int
+}
+
+func identifyOS() (osversion, error) {
+       var osv osversion
+       f, err := os.Open("/etc/os-release")
+       if err != nil {
+               return osv, err
+       }
+       defer f.Close()
+
+       kv := map[string]string{}
+       scanner := bufio.NewScanner(f)
+       for scanner.Scan() {
+               line := strings.TrimSpace(scanner.Text())
+               if strings.HasPrefix(line, "#") {
+                       continue
+               }
+               toks := strings.SplitN(line, "=", 2)
+               if len(toks) != 2 {
+                       return osv, fmt.Errorf("invalid line in /etc/os-release: %q", line)
+               }
+               k := toks[0]
+               v := strings.Trim(toks[1], `"`)
+               if v == toks[1] {
+                       v = strings.Trim(v, `'`)
+               }
+               kv[k] = v
+       }
+       if err = scanner.Err(); err != nil {
+               return osv, err
+       }
+       switch kv["ID"] {
+       case "ubuntu":
+               osv.Ubuntu = true
+       case "debian":
+               osv.Debian = true
+       default:
+               return osv, fmt.Errorf("unsupported ID in /etc/os-release: %q", kv["ID"])
+       }
+       vstr := kv["VERSION_ID"]
+       if i := strings.Index(vstr, "."); i > 0 {
+               vstr = vstr[:i]
+       }
+       osv.Major, err = strconv.Atoi(vstr)
+       if err != nil {
+               return osv, fmt.Errorf("incomprehensible VERSION_ID in /etc/os/release: %q", kv["VERSION_ID"])
+       }
+       return osv, nil
+}
+
+func runBash(script string, stdout, stderr io.Writer) error {
+       cmd := exec.Command("bash", "-")
+       cmd.Stdin = bytes.NewBufferString("set -ex -o pipefail\n" + script)
+       cmd.Stdout = stdout
+       cmd.Stderr = stderr
+       return cmd.Run()
+}
diff --git a/lib/install/deps_test.go b/lib/install/deps_test.go
new file mode 100644 (file)
index 0000000..5dfdbfe
--- /dev/null
@@ -0,0 +1,47 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+// Skip this slow test unless invoked as "go test -tags docker".
+// Depending on host/network speed, Go's default 10m test timeout
+// might be too short; recommend "go test -timeout 20m -tags docker".
+//
+// +build docker
+
+package install
+
+import (
+       "os"
+       "testing"
+
+       "gopkg.in/check.v1"
+)
+
+func Test(t *testing.T) {
+       check.TestingT(t)
+}
+
+var _ = check.Suite(&Suite{})
+
+type Suite struct{}
+
+func (*Suite) TestInstallDeps(c *check.C) {
+       tmp := c.MkDir()
+       script := `
+set -x
+tmp="` + tmp + `"
+sourcepath="$(realpath ../..)"
+(cd ${sourcepath} && go build -o ${tmp} ./cmd/arvados-server)
+docker run -i --rm --workdir /arvados \
+       -v ${tmp}/arvados-server:/arvados-server:ro \
+       -v ${sourcepath}:/arvados:ro \
+       -v /arvados/apps/workbench/.bundle \
+       -v /arvados/services/api/.bundle \
+       -v /arvados/services/api/tmp \
+       --env http_proxy \
+       --env https_proxy \
+       debian:10 \
+       bash -c "/arvados-server install -type test && /arvados-server boot -type test -config doc/examples/config/zzzzz.yml -own-temporary-database -shutdown -timeout 9m"
+`
+       c.Check(runBash(script, os.Stdout, os.Stderr), check.IsNil)
+}
diff --git a/lib/install/example_from_scratch.sh b/lib/install/example_from_scratch.sh
new file mode 100644 (file)
index 0000000..03d9b7f
--- /dev/null
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+set -e -o pipefail
+
+# Starting with a base debian buster system, like "docker run -it
+# debian:10"...
+
+apt update
+apt upgrade
+apt install --no-install-recommends build-essential ca-certificates git golang
+git clone https://git.arvados.org/arvados.git
+cd arvados
+[[ -e lib/install ]] || git checkout origin/16053-install-deps
+cd cmd/arvados-server
+go run ./cmd/arvados-server install -type test
+pg_isready || pg_ctlcluster 11 main start # only needed if there's no init process (as in docker)
+build/run-tests.sh
index 7f2f78ee9a9f7224aac4aacba94148497f292a5e..1e7a9a36edd3a8142192d14bfcfbf12885e1e857 100644 (file)
@@ -29,6 +29,7 @@ import (
 type Handler interface {
        http.Handler
        CheckHealth() error
+       Done() <-chan struct{}
 }
 
 type NewHandlerFunc func(_ context.Context, _ *arvados.Cluster, token string, registry *prometheus.Registry) Handler
@@ -148,9 +149,15 @@ func (c *command) RunCommand(prog string, args []string, stdin io.Reader, stdout
                logger.WithError(err).Errorf("error notifying init daemon")
        }
        go func() {
+               // Shut down server if caller cancels context
                <-ctx.Done()
                srv.Close()
        }()
+       go func() {
+               // Shut down server if handler dies
+               <-handler.Done()
+               srv.Close()
+       }()
        err = srv.Wait()
        if err != nil {
                return 1
index 86039c4dd1fa111d2de292676f4773c9bdc203a1..ec7834972c2609aeb5e4cd14099d35367a7e3c09 100644 (file)
@@ -135,6 +135,7 @@ type testHandler struct {
        healthCheck chan bool
 }
 
+func (th *testHandler) Done() <-chan struct{}                            { return nil }
 func (th *testHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) { th.handler.ServeHTTP(w, r) }
 func (th *testHandler) CheckHealth() error {
        ctxlog.FromContext(th.ctx).Info("CheckHealth called")
index c4049f7064d70a5a88c654be10cff478bb0f42f3..a4d7370d1b8d0d6a3b630025a181a56c029ed3b0 100644 (file)
@@ -36,3 +36,15 @@ func (eh errorHandler) ServeHTTP(w http.ResponseWriter, r *http.Request) {
 func (eh errorHandler) CheckHealth() error {
        return eh.err
 }
+
+// Done returns a closed channel to indicate the service has
+// stopped/failed.
+func (eh errorHandler) Done() <-chan struct{} {
+       return doneChannel
+}
+
+var doneChannel = func() <-chan struct{} {
+       done := make(chan struct{})
+       close(done)
+       return done
+}()
index 71f6f85bff4754aa5d9d038fd4fb8b900d4f8cd8..6b83fb96d49e6359e656c3e634a273b6f29c4e16 100644 (file)
@@ -424,6 +424,24 @@ var errDuplicateInstanceTypeName = errors.New("duplicate instance type name")
 // UnmarshalJSON handles old config files that provide an array of
 // instance types instead of a hash.
 func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
+       fixup := func(t InstanceType) (InstanceType, error) {
+               if t.ProviderType == "" {
+                       t.ProviderType = t.Name
+               }
+               if t.Scratch == 0 {
+                       t.Scratch = t.IncludedScratch + t.AddedScratch
+               } else if t.AddedScratch == 0 {
+                       t.AddedScratch = t.Scratch - t.IncludedScratch
+               } else if t.IncludedScratch == 0 {
+                       t.IncludedScratch = t.Scratch - t.AddedScratch
+               }
+
+               if t.Scratch != (t.IncludedScratch + t.AddedScratch) {
+                       return t, fmt.Errorf("InstanceType %q: Scratch != (IncludedScratch + AddedScratch)", t.Name)
+               }
+               return t, nil
+       }
+
        if len(data) > 0 && data[0] == '[' {
                var arr []InstanceType
                err := json.Unmarshal(data, &arr)
@@ -439,19 +457,9 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
                        if _, ok := (*it)[t.Name]; ok {
                                return errDuplicateInstanceTypeName
                        }
-                       if t.ProviderType == "" {
-                               t.ProviderType = t.Name
-                       }
-                       if t.Scratch == 0 {
-                               t.Scratch = t.IncludedScratch + t.AddedScratch
-                       } else if t.AddedScratch == 0 {
-                               t.AddedScratch = t.Scratch - t.IncludedScratch
-                       } else if t.IncludedScratch == 0 {
-                               t.IncludedScratch = t.Scratch - t.AddedScratch
-                       }
-
-                       if t.Scratch != (t.IncludedScratch + t.AddedScratch) {
-                               return fmt.Errorf("%v: Scratch != (IncludedScratch + AddedScratch)", t.Name)
+                       t, err := fixup(t)
+                       if err != nil {
+                               return err
                        }
                        (*it)[t.Name] = t
                }
@@ -467,8 +475,9 @@ func (it *InstanceTypeMap) UnmarshalJSON(data []byte) error {
        *it = InstanceTypeMap(hash)
        for name, t := range *it {
                t.Name = name
-               if t.ProviderType == "" {
-                       t.ProviderType = name
+               t, err := fixup(t)
+               if err != nil {
+                       return err
                }
                (*it)[name] = t
        }
index b984cb5669ce851f2ec1f136a9c96bfb0d06b832..e4d26e03fd3f8101ad339f648b1efbaa56208437 100644 (file)
@@ -45,3 +45,29 @@ func (s *ConfigSuite) TestInstanceTypeSize(c *check.C) {
        c.Check(int64(it.Scratch), check.Equals, int64(4000000000))
        c.Check(int64(it.RAM), check.Equals, int64(4294967296))
 }
+
+func (s *ConfigSuite) TestInstanceTypeFixup(c *check.C) {
+       for _, confdata := range []string{
+               // Current format: map of entries
+               `{foo4: {IncludedScratch: 4GB}, foo8: {ProviderType: foo_8, Scratch: 8GB}}`,
+               // Legacy format: array of entries with key in "Name" field
+               `[{Name: foo4, IncludedScratch: 4GB}, {Name: foo8, ProviderType: foo_8, Scratch: 8GB}]`,
+       } {
+               c.Log(confdata)
+               var itm InstanceTypeMap
+               err := yaml.Unmarshal([]byte(confdata), &itm)
+               c.Check(err, check.IsNil)
+
+               c.Check(itm["foo4"].Name, check.Equals, "foo4")
+               c.Check(itm["foo4"].ProviderType, check.Equals, "foo4")
+               c.Check(itm["foo4"].Scratch, check.Equals, ByteSize(4000000000))
+               c.Check(itm["foo4"].AddedScratch, check.Equals, ByteSize(0))
+               c.Check(itm["foo4"].IncludedScratch, check.Equals, ByteSize(4000000000))
+
+               c.Check(itm["foo8"].Name, check.Equals, "foo8")
+               c.Check(itm["foo8"].ProviderType, check.Equals, "foo_8")
+               c.Check(itm["foo8"].Scratch, check.Equals, ByteSize(8000000000))
+               c.Check(itm["foo8"].AddedScratch, check.Equals, ByteSize(8000000000))
+               c.Check(itm["foo8"].IncludedScratch, check.Equals, ByteSize(0))
+       }
+}
index a0284e8f247a60f8d2fd57b752f37a800d54c222..794adabdd3926b6b04036a6c62b1044f2e8f13d5 100644 (file)
@@ -46,6 +46,10 @@ func (agg *Aggregator) CheckHealth() error {
        return nil
 }
 
+func (agg *Aggregator) Done() <-chan struct{} {
+       return nil
+}
+
 func (agg *Aggregator) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        agg.setupOnce.Do(agg.setup)
        sendErr := func(statusCode int, err error) {
index 8886f9517dfd5983032235e713a000f5615880b7..59981e3e55265be4eed1827d3570391533ac3a30 100644 (file)
@@ -53,10 +53,22 @@ func LogRequests(h http.Handler) http.Handler {
 
                logRequest(w, req, lgr)
                defer logResponse(w, req, lgr)
-               h.ServeHTTP(w, req)
+               h.ServeHTTP(rewrapResponseWriter(w, wrapped), req)
        })
 }
 
+// Rewrap w to restore additional interfaces provided by wrapped.
+func rewrapResponseWriter(w http.ResponseWriter, wrapped http.ResponseWriter) http.ResponseWriter {
+       if hijacker, ok := wrapped.(http.Hijacker); ok {
+               return struct {
+                       http.ResponseWriter
+                       http.Hijacker
+               }{w, hijacker}
+       } else {
+               return w
+       }
+}
+
 func Logger(req *http.Request) logrus.FieldLogger {
        return ctxlog.FromContext(req.Context())
 }
index 89f8a0cbb1b0ee812bb96ddf2ad6581f567867e2..5b09db948aad5fe1f7866b87bdd595cd9d976200 100644 (file)
@@ -21,7 +21,7 @@ dependencies {
     api 'com.typesafe:config:1.3.2'
     
     testImplementation 'junit:junit:4.12'
-    testImplementation 'org.mockito:mockito-core:2.12.0'
+    testImplementation 'org.mockito:mockito-core:3.3.3'
     testImplementation 'org.assertj:assertj-core:3.8.0'
     testImplementation 'com.squareup.okhttp3:mockwebserver:3.9.1'
 }
index 262b9d2a2cbc7f0925867e9a522b8402e0386bd2..22d4f62ea0fd1bf5a8d6718e2d410b79d5377d72 100644 (file)
@@ -430,7 +430,8 @@ def run_ws():
     stop_ws()
     port = internal_port_from_config("Websocket")
     logf = open(_logfilename('ws'), 'a')
-    ws = subprocess.Popen(["ws"],
+    ws = subprocess.Popen(
+        ["arvados-server", "ws"],
         stdin=open('/dev/null'), stdout=logf, stderr=logf, close_fds=True)
     with open(_pidfile('ws'), 'w') as f:
         f.write(str(ws.pid))
@@ -661,11 +662,22 @@ def setup_config():
     keep_web_dl_port = find_available_port()
     keep_web_dl_external_port = find_available_port()
 
-    dbconf = os.path.join(os.environ["CONFIGSRC"], "config.yml")
-
-    print("Getting config from %s" % dbconf, file=sys.stderr)
-
-    pgconnection = yaml.safe_load(open(dbconf))["Clusters"]["zzzzz"]["PostgreSQL"]["Connection"]
+    configsrc = os.environ.get("CONFIGSRC", None)
+    if configsrc:
+        clusterconf = os.path.join(configsrc, "config.yml")
+        print("Getting config from %s" % clusterconf, file=sys.stderr)
+        pgconnection = yaml.safe_load(open(clusterconf))["Clusters"]["zzzzz"]["PostgreSQL"]["Connection"]
+    else:
+        # assume "arvados-server install -type test" has set up the
+        # conventional db credentials
+        pgconnection = {
+           "client_encoding": "utf8",
+           "host": "localhost",
+           "dbname": "arvados_test",
+           "user": "arvados",
+           "password": "insecure_arvados_test",
+            "template": "template0", # used by RailsAPI when [re]creating the database
+        }
 
     localhost = "127.0.0.1"
     services = {
index 68158d760ee785a501d75e931ac5635109f32c13..27e3cf6330728b8fa523d51d2197885cc1229070 100644 (file)
@@ -832,7 +832,9 @@ class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase):
         kc = self.keepClient()
         loc = kc.put(self.DATA, copies=1, num_retries=0)
         self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
-        self.server.setdelays(response=self.TIMEOUT_TIME)
+        # Note the actual delay must be 1s longer than the low speed
+        # limit interval in order for curl to detect it reliably.
+        self.server.setdelays(response=self.TIMEOUT_TIME+1)
         with self.assertTakesGreater(self.TIMEOUT_TIME):
             with self.assertRaises(arvados.errors.KeepReadError):
                 kc.get(loc, num_retries=0)
@@ -846,7 +848,9 @@ class KeepClientTimeout(keepstub.StubKeepServers, unittest.TestCase):
         kc = self.keepClient()
         loc = kc.put(self.DATA, copies=1, num_retries=0)
         self.server.setbandwidth(self.BANDWIDTH_LOW_LIM)
-        self.server.setdelays(mid_write=self.TIMEOUT_TIME, mid_read=self.TIMEOUT_TIME)
+        # Note the actual delay must be 1s longer than the low speed
+        # limit interval in order for curl to detect it reliably.
+        self.server.setdelays(mid_write=self.TIMEOUT_TIME+1, mid_read=self.TIMEOUT_TIME+1)
         with self.assertTakesGreater(self.TIMEOUT_TIME):
             with self.assertRaises(arvados.errors.KeepReadError) as e:
                 kc.get(loc, num_retries=0)
index 05658b5e5d7f17a4dcd9bd099d81d68950f97a8f..9801a3fd45d5d13ec40bf661c59b4de5156cfeed 100644 (file)
@@ -53,6 +53,11 @@ func (srv *Server) CheckHealth() error {
        return nil
 }
 
+// Done implements service.Handler.
+func (srv *Server) Done() <-chan struct{} {
+       return nil
+}
+
 func (srv *Server) run() {
        var err error
        if srv.RunOptions.Once {
index e0509393cff077e119b6ea7b975d10f90115d536..be2639773650fe5312ef80759cc427bd5cf0c14c 100644 (file)
@@ -132,6 +132,10 @@ func (h *handler) CheckHealth() error {
        return h.err
 }
 
+func (h *handler) Done() <-chan struct{} {
+       return nil
+}
+
 func newHandlerOrErrorHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
        var h handler
        serviceURL, ok := service.URLFromContext(ctx)
index a60aa416a90b79d36d55a6aacf9c50de827af7bf..7777363b9d13815ab3036ae916a2c0f6989eb95f 100644 (file)
@@ -13,7 +13,6 @@ import (
        "io"
        "io/ioutil"
        "os"
-       "strings"
        "sync"
        "syscall"
        "time"
@@ -168,11 +167,10 @@ func (s *UnixVolumeSuite) TestPutBadVolume(c *check.C) {
        v := s.newTestableUnixVolume(c, s.cluster, arvados.Volume{Replication: 1}, s.metrics, false)
        defer v.Teardown()
 
-       os.Chmod(v.Root, 000)
-       err := v.Put(context.Background(), TestHash, TestBlock)
-       if err == nil {
-               c.Error("Write should have failed")
-       }
+       err := os.RemoveAll(v.Root)
+       c.Assert(err, check.IsNil)
+       err = v.Put(context.Background(), TestHash, TestBlock)
+       c.Check(err, check.IsNil)
 }
 
 func (s *UnixVolumeSuite) TestUnixVolumeReadonly(c *check.C) {
@@ -330,11 +328,14 @@ func (s *UnixVolumeSuite) TestUnixVolumeCompare(c *check.C) {
                c.Errorf("Got err %q, expected %q", err, DiskHashError)
        }
 
-       p := fmt.Sprintf("%s/%s/%s", v.Root, TestHash[:3], TestHash)
-       os.Chmod(p, 000)
-       err = v.Compare(context.Background(), TestHash, TestBlock)
-       if err == nil || strings.Index(err.Error(), "permission denied") < 0 {
-               c.Errorf("Got err %q, expected %q", err, "permission denied")
+       if os.Getuid() == 0 {
+               c.Log("skipping 'permission denied' check when running as root")
+       } else {
+               p := fmt.Sprintf("%s/%s/%s", v.Root, TestHash[:3], TestHash)
+               err = os.Chmod(p, 000)
+               c.Assert(err, check.IsNil)
+               err = v.Compare(context.Background(), TestHash, TestBlock)
+               c.Check(err, check.ErrorMatches, ".*permission denied.*")
        }
 }
 
index 806c3355da6c693350493a7471bc59e270bfb1e3..6a86cbe7a8307e1683dbd09ea506bc8cd79f52e3 100644 (file)
 // Developer info
 //
 // See https://dev.arvados.org/projects/arvados/wiki/Hacking_websocket_server.
-//
-// Usage
-//
-//     arvados-ws [-legacy-ws-config /etc/arvados/ws/ws.yml] [-dump-config]
-//
-// Options
-//
-// -legacy-ws-config path
-//
-// Load legacy configuration from the given file instead of the default
-// /etc/arvados/ws/ws.yml, legacy config overrides the clusterwide config.yml.
-//
-// -dump-config
-//
-// Print the loaded configuration to stdout and exit.
-//
-// Logs
-//
-// Logs are printed to stderr, formatted as JSON.
-//
-// A log is printed each time a client connects or disconnects.
-//
-// Enable additional logs by configuring:
-//
-//     LogLevel: debug
-//
-// Runtime status
-//
-// GET /debug.json responds with debug stats.
-//
-// GET /status.json responds with health check results and
-// activity/usage metrics.
-package main
+package ws
index ae545c092cf8ddece45cfbebdddb542e08de16b4..c989c0ca559b1a1cff472b2cc1bdb95b4fd021ce 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "database/sql"
@@ -11,6 +11,7 @@ import (
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "github.com/ghodss/yaml"
+       "github.com/sirupsen/logrus"
 )
 
 type eventSink interface {
@@ -31,6 +32,7 @@ type event struct {
        Serial   uint64
 
        db     *sql.DB
+       logger logrus.FieldLogger
        logRow *arvados.Log
        err    error
        mtx    sync.Mutex
@@ -57,12 +59,12 @@ func (e *event) Detail() *arvados.Log {
                &logRow.CreatedAt,
                &propYAML)
        if e.err != nil {
-               logger(nil).WithField("LogID", e.LogID).WithError(e.err).Error("QueryRow failed")
+               e.logger.WithField("LogID", e.LogID).WithError(e.err).Error("QueryRow failed")
                return nil
        }
        e.err = yaml.Unmarshal(propYAML, &logRow.Properties)
        if e.err != nil {
-               logger(nil).WithField("LogID", e.LogID).WithError(e.err).Error("yaml decode failed")
+               e.logger.WithField("LogID", e.LogID).WithError(e.err).Error("yaml decode failed")
                return nil
        }
        e.logRow = &logRow
index 3a82bf62b3e9351a95d2abe4c56ae942fededa4c..3593c3aebd58ceae6932e9667eca43aba8a8c0cf 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "context"
@@ -11,17 +11,20 @@ import (
        "fmt"
        "strconv"
        "sync"
-       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/stats"
        "github.com/lib/pq"
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
 )
 
 type pgEventSource struct {
        DataSource   string
        MaxOpenConns int
        QueueSize    int
+       Logger       logrus.FieldLogger
+       Reg          *prometheus.Registry
 
        db         *sql.DB
        pqListener *pq.Listener
@@ -30,8 +33,8 @@ type pgEventSource struct {
        mtx        sync.Mutex
 
        lastQDelay time.Duration
-       eventsIn   uint64
-       eventsOut  uint64
+       eventsIn   prometheus.Counter
+       eventsOut  prometheus.Counter
 
        cancel func()
 
@@ -39,18 +42,16 @@ type pgEventSource struct {
        ready     chan bool
 }
 
-var _ debugStatuser = (*pgEventSource)(nil)
-
 func (ps *pgEventSource) listenerProblem(et pq.ListenerEventType, err error) {
        if et == pq.ListenerEventConnected {
-               logger(nil).Debug("pgEventSource connected")
+               ps.Logger.Debug("pgEventSource connected")
                return
        }
 
        // Until we have a mechanism for catching up on missed events,
        // we cannot recover from a dropped connection without
        // breaking our promises to clients.
-       logger(nil).
+       ps.Logger.
                WithField("eventType", et).
                WithError(err).
                Error("listener problem")
@@ -59,6 +60,95 @@ func (ps *pgEventSource) listenerProblem(et pq.ListenerEventType, err error) {
 
 func (ps *pgEventSource) setup() {
        ps.ready = make(chan bool)
+       ps.Reg.MustRegister(prometheus.NewGaugeFunc(
+               prometheus.GaugeOpts{
+                       Namespace: "arvados",
+                       Subsystem: "ws",
+                       Name:      "queue_len",
+                       Help:      "Current number of events in queue",
+               }, func() float64 { return float64(len(ps.queue)) }))
+       ps.Reg.MustRegister(prometheus.NewGaugeFunc(
+               prometheus.GaugeOpts{
+                       Namespace: "arvados",
+                       Subsystem: "ws",
+                       Name:      "queue_cap",
+                       Help:      "Event queue capacity",
+               }, func() float64 { return float64(cap(ps.queue)) }))
+       ps.Reg.MustRegister(prometheus.NewGaugeFunc(
+               prometheus.GaugeOpts{
+                       Namespace: "arvados",
+                       Subsystem: "ws",
+                       Name:      "queue_delay",
+                       Help:      "Queue delay of the last emitted event",
+               }, func() float64 { return ps.lastQDelay.Seconds() }))
+       ps.Reg.MustRegister(prometheus.NewGaugeFunc(
+               prometheus.GaugeOpts{
+                       Namespace: "arvados",
+                       Subsystem: "ws",
+                       Name:      "sinks",
+                       Help:      "Number of active sinks (connections)",
+               }, func() float64 { return float64(len(ps.sinks)) }))
+       ps.Reg.MustRegister(prometheus.NewGaugeFunc(
+               prometheus.GaugeOpts{
+                       Namespace: "arvados",
+                       Subsystem: "ws",
+                       Name:      "sinks_blocked",
+                       Help:      "Number of sinks (connections) that are busy and blocking the main event stream",
+               }, func() float64 {
+                       ps.mtx.Lock()
+                       defer ps.mtx.Unlock()
+                       blocked := 0
+                       for sink := range ps.sinks {
+                               blocked += len(sink.channel)
+                       }
+                       return float64(blocked)
+               }))
+       ps.eventsIn = prometheus.NewCounter(prometheus.CounterOpts{
+               Namespace: "arvados",
+               Subsystem: "ws",
+               Name:      "events_in",
+               Help:      "Number of events received from postgresql notify channel",
+       })
+       ps.Reg.MustRegister(ps.eventsIn)
+       ps.eventsOut = prometheus.NewCounter(prometheus.CounterOpts{
+               Namespace: "arvados",
+               Subsystem: "ws",
+               Name:      "events_out",
+               Help:      "Number of events sent to client sessions (before filtering)",
+       })
+       ps.Reg.MustRegister(ps.eventsOut)
+
+       maxConnections := prometheus.NewGauge(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "ws",
+               Name:      "db_max_connections",
+               Help:      "Maximum number of open connections to the database",
+       })
+       ps.Reg.MustRegister(maxConnections)
+       openConnections := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "ws",
+               Name:      "db_open_connections",
+               Help:      "Open connections to the database",
+       }, []string{"inuse"})
+       ps.Reg.MustRegister(openConnections)
+
+       updateDBStats := func() {
+               stats := ps.db.Stats()
+               maxConnections.Set(float64(stats.MaxOpenConnections))
+               openConnections.WithLabelValues("0").Set(float64(stats.Idle))
+               openConnections.WithLabelValues("1").Set(float64(stats.InUse))
+       }
+       go func() {
+               <-ps.ready
+               if ps.db == nil {
+                       return
+               }
+               updateDBStats()
+               for range time.Tick(time.Second) {
+                       updateDBStats()
+               }
+       }()
 }
 
 // Close stops listening for new events and disconnects all clients.
@@ -76,8 +166,8 @@ func (ps *pgEventSource) WaitReady() {
 // Run listens for event notifications on the "logs" channel and sends
 // them to all subscribers.
 func (ps *pgEventSource) Run() {
-       logger(nil).Debug("pgEventSource Run starting")
-       defer logger(nil).Debug("pgEventSource Run finished")
+       ps.Logger.Debug("pgEventSource Run starting")
+       defer ps.Logger.Debug("pgEventSource Run finished")
 
        ps.setupOnce.Do(ps.setup)
        ready := ps.ready
@@ -103,15 +193,15 @@ func (ps *pgEventSource) Run() {
 
        db, err := sql.Open("postgres", ps.DataSource)
        if err != nil {
-               logger(nil).WithError(err).Error("sql.Open failed")
+               ps.Logger.WithError(err).Error("sql.Open failed")
                return
        }
        if ps.MaxOpenConns <= 0 {
-               logger(nil).Warn("no database connection limit configured -- consider setting PostgresPool>0 in arvados-ws configuration file")
+               ps.Logger.Warn("no database connection limit configured -- consider setting PostgreSQL.ConnectionPool>0 in arvados-ws configuration file")
        }
        db.SetMaxOpenConns(ps.MaxOpenConns)
        if err = db.Ping(); err != nil {
-               logger(nil).WithError(err).Error("db.Ping failed")
+               ps.Logger.WithError(err).Error("db.Ping failed")
                return
        }
        ps.db = db
@@ -119,11 +209,11 @@ func (ps *pgEventSource) Run() {
        ps.pqListener = pq.NewListener(ps.DataSource, time.Second, time.Minute, ps.listenerProblem)
        err = ps.pqListener.Listen("logs")
        if err != nil {
-               logger(nil).WithError(err).Error("pq Listen failed")
+               ps.Logger.WithError(err).Error("pq Listen failed")
                return
        }
        defer ps.pqListener.Close()
-       logger(nil).Debug("pq Listen setup done")
+       ps.Logger.Debug("pq Listen setup done")
 
        close(ready)
        // Avoid double-close in deferred func
@@ -141,7 +231,7 @@ func (ps *pgEventSource) Run() {
                        // client_count X client_queue_size.
                        e.Detail()
 
-                       logger(nil).
+                       ps.Logger.
                                WithField("serial", e.Serial).
                                WithField("detail", e.Detail()).
                                Debug("event ready")
@@ -149,9 +239,9 @@ func (ps *pgEventSource) Run() {
                        ps.lastQDelay = e.Ready.Sub(e.Received)
 
                        ps.mtx.Lock()
-                       atomic.AddUint64(&ps.eventsOut, uint64(len(ps.sinks)))
                        for sink := range ps.sinks {
                                sink.channel <- e
+                               ps.eventsOut.Inc()
                        }
                        ps.mtx.Unlock()
                }
@@ -163,11 +253,11 @@ func (ps *pgEventSource) Run() {
        for {
                select {
                case <-ctx.Done():
-                       logger(nil).Debug("ctx done")
+                       ps.Logger.Debug("ctx done")
                        return
 
                case <-ticker.C:
-                       logger(nil).Debug("listener ping")
+                       ps.Logger.Debug("listener ping")
                        err := ps.pqListener.Ping()
                        if err != nil {
                                ps.listenerProblem(-1, fmt.Errorf("pqListener ping failed: %s", err))
@@ -176,7 +266,7 @@ func (ps *pgEventSource) Run() {
 
                case pqEvent, ok := <-ps.pqListener.Notify:
                        if !ok {
-                               logger(nil).Error("pqListener Notify chan closed")
+                               ps.Logger.Error("pqListener Notify chan closed")
                                return
                        }
                        if pqEvent == nil {
@@ -188,12 +278,12 @@ func (ps *pgEventSource) Run() {
                                continue
                        }
                        if pqEvent.Channel != "logs" {
-                               logger(nil).WithField("pqEvent", pqEvent).Error("unexpected notify from wrong channel")
+                               ps.Logger.WithField("pqEvent", pqEvent).Error("unexpected notify from wrong channel")
                                continue
                        }
                        logID, err := strconv.ParseUint(pqEvent.Extra, 10, 64)
                        if err != nil {
-                               logger(nil).WithField("pqEvent", pqEvent).Error("bad notify payload")
+                               ps.Logger.WithField("pqEvent", pqEvent).Error("bad notify payload")
                                continue
                        }
                        serial++
@@ -202,9 +292,10 @@ func (ps *pgEventSource) Run() {
                                Received: time.Now(),
                                Serial:   serial,
                                db:       ps.db,
+                               logger:   ps.Logger,
                        }
-                       logger(nil).WithField("event", e).Debug("incoming")
-                       atomic.AddUint64(&ps.eventsIn, 1)
+                       ps.Logger.WithField("event", e).Debug("incoming")
+                       ps.eventsIn.Inc()
                        ps.queue <- e
                        go e.Detail()
                }
@@ -238,6 +329,9 @@ func (ps *pgEventSource) DB() *sql.DB {
 }
 
 func (ps *pgEventSource) DBHealth() error {
+       if ps.db == nil {
+               return errors.New("database not connected")
+       }
        ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(time.Second))
        defer cancel()
        var i int
@@ -252,8 +346,6 @@ func (ps *pgEventSource) DebugStatus() interface{} {
                blocked += len(sink.channel)
        }
        return map[string]interface{}{
-               "EventsIn":     atomic.LoadUint64(&ps.eventsIn),
-               "EventsOut":    atomic.LoadUint64(&ps.eventsOut),
                "Queue":        len(ps.queue),
                "QueueLimit":   cap(ps.queue),
                "QueueDelay":   stats.Duration(ps.lastQDelay),
index 98a9e8b9785b40dbd8f5314bcedb98bd083efe44..b7b8ac3006f3fa6af19de31737af82129dbf8642 100644 (file)
@@ -2,17 +2,17 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "database/sql"
        "fmt"
-       "os"
-       "path/filepath"
        "sync"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
        check "gopkg.in/check.v1"
 )
 
@@ -21,7 +21,7 @@ var _ = check.Suite(&eventSourceSuite{})
 type eventSourceSuite struct{}
 
 func testDBConfig() arvados.PostgreSQLConnection {
-       cfg, err := arvados.GetConfig(filepath.Join(os.Getenv("WORKSPACE"), "tmp", "arvados.yml"))
+       cfg, err := arvados.GetConfig(arvados.DefaultConfigFile)
        if err != nil {
                panic(err)
        }
@@ -46,6 +46,8 @@ func (*eventSourceSuite) TestEventSource(c *check.C) {
        pges := &pgEventSource{
                DataSource: cfg.String(),
                QueueSize:  4,
+               Logger:     ctxlog.TestLogger(c),
+               Reg:        prometheus.NewRegistry(),
        }
        go pges.Run()
        sinks := make([]eventSink, 18)
index dc324464ec3d15f4b473b5d9b91f3557c7a90abd..4665dfcd9ee9208fcb71794189ba115d0285fa55 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import check "gopkg.in/check.v1"
 
index ea8dfc30c94c94e19308192c8c6713f745ce3a9b..df1ca7ab31c292280ab8a72c2f56155ef4c68e84 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "testing"
@@ -13,3 +13,7 @@ import (
 func TestGocheck(t *testing.T) {
        check.TestingT(t)
 }
+
+func init() {
+       testMode = true
+}
index 913b1ee8000cbd274039483df70bad7896d52df5..912643ad97c6374006b3fd4b00f90d340157d687 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "context"
@@ -12,6 +12,7 @@ import (
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/stats"
+       "github.com/sirupsen/logrus"
 )
 
 type handler struct {
@@ -31,12 +32,11 @@ type handlerStats struct {
        EventCount   uint64
 }
 
-func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsConn, chan<- interface{}) (session, error)) (hStats handlerStats) {
+func (h *handler) Handle(ws wsConn, logger logrus.FieldLogger, eventSource eventSource, newSession func(wsConn, chan<- interface{}) (session, error)) (hStats handlerStats) {
        h.setupOnce.Do(h.setup)
 
        ctx, cancel := context.WithCancel(ws.Request().Context())
        defer cancel()
-       log := logger(ctx)
 
        incoming := eventSource.NewSink()
        defer incoming.Stop()
@@ -53,7 +53,7 @@ func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsC
 
        sess, err := newSession(ws, queue)
        if err != nil {
-               log.WithError(err).Error("newSession failed")
+               logger.WithError(err).Error("newSession failed")
                return
        }
 
@@ -71,19 +71,19 @@ func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsC
                        ws.SetReadDeadline(time.Now().Add(24 * 365 * time.Hour))
                        n, err := ws.Read(buf)
                        buf := buf[:n]
-                       log.WithField("frame", string(buf[:n])).Debug("received frame")
+                       logger.WithField("frame", string(buf[:n])).Debug("received frame")
                        if err == nil && n == cap(buf) {
                                err = errFrameTooBig
                        }
                        if err != nil {
                                if err != io.EOF && ctx.Err() == nil {
-                                       log.WithError(err).Info("read error")
+                                       logger.WithError(err).Info("read error")
                                }
                                return
                        }
                        err = sess.Receive(buf)
                        if err != nil {
-                               log.WithError(err).Error("sess.Receive() failed")
+                               logger.WithError(err).Error("sess.Receive() failed")
                                return
                        }
                }
@@ -108,38 +108,38 @@ func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsC
                        var e *event
                        var buf []byte
                        var err error
-                       log := log
+                       logger := logger
 
                        switch data := data.(type) {
                        case []byte:
                                buf = data
                        case *event:
                                e = data
-                               log = log.WithField("serial", e.Serial)
+                               logger = logger.WithField("serial", e.Serial)
                                buf, err = sess.EventMessage(e)
                                if err != nil {
-                                       log.WithError(err).Error("EventMessage failed")
+                                       logger.WithError(err).Error("EventMessage failed")
                                        return
                                } else if len(buf) == 0 {
-                                       log.Debug("skip")
+                                       logger.Debug("skip")
                                        continue
                                }
                        default:
-                               log.WithField("data", data).Error("bad object in client queue")
+                               logger.WithField("data", data).Error("bad object in client queue")
                                continue
                        }
 
-                       log.WithField("frame", string(buf)).Debug("send event")
+                       logger.WithField("frame", string(buf)).Debug("send event")
                        ws.SetWriteDeadline(time.Now().Add(h.PingTimeout))
                        t0 := time.Now()
                        _, err = ws.Write(buf)
                        if err != nil {
                                if ctx.Err() == nil {
-                                       log.WithError(err).Error("write failed")
+                                       logger.WithError(err).Error("write failed")
                                }
                                return
                        }
-                       log.Debug("sent")
+                       logger.Debug("sent")
 
                        if e != nil {
                                hStats.QueueDelayNs += t0.Sub(e.Ready)
@@ -189,7 +189,7 @@ func (h *handler) Handle(ws wsConn, eventSource eventSource, newSession func(wsC
                                select {
                                case queue <- e:
                                default:
-                                       log.WithError(errQueueFull).Error("terminate")
+                                       logger.WithError(errQueueFull).Error("terminate")
                                        return
                                }
                        }
diff --git a/services/ws/main.go b/services/ws/main.go
deleted file mode 100644 (file)
index 5b42c44..0000000
+++ /dev/null
@@ -1,77 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "flag"
-       "fmt"
-       "os"
-
-       "git.arvados.org/arvados.git/lib/config"
-       "git.arvados.org/arvados.git/sdk/go/arvados"
-       "git.arvados.org/arvados.git/sdk/go/ctxlog"
-       "github.com/ghodss/yaml"
-       "github.com/sirupsen/logrus"
-)
-
-var logger = ctxlog.FromContext
-var version = "dev"
-
-func configure(log logrus.FieldLogger, args []string) *arvados.Cluster {
-       flags := flag.NewFlagSet(args[0], flag.ExitOnError)
-       dumpConfig := flags.Bool("dump-config", false, "show current configuration and exit")
-       getVersion := flags.Bool("version", false, "Print version information and exit.")
-
-       loader := config.NewLoader(nil, log)
-       loader.SetupFlags(flags)
-       args = loader.MungeLegacyConfigArgs(log, args[1:], "-legacy-ws-config")
-
-       flags.Parse(args)
-
-       // Print version information if requested
-       if *getVersion {
-               fmt.Printf("arvados-ws %s\n", version)
-               return nil
-       }
-
-       cfg, err := loader.Load()
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       cluster, err := cfg.GetCluster("")
-       if err != nil {
-               log.Fatal(err)
-       }
-
-       ctxlog.SetLevel(cluster.SystemLogs.LogLevel)
-       ctxlog.SetFormat(cluster.SystemLogs.Format)
-
-       if *dumpConfig {
-               out, err := yaml.Marshal(cfg)
-               if err != nil {
-                       log.Fatal(err)
-               }
-               _, err = os.Stdout.Write(out)
-               if err != nil {
-                       log.Fatal(err)
-               }
-               return nil
-       }
-       return cluster
-}
-
-func main() {
-       log := logger(nil)
-
-       cluster := configure(log, os.Args)
-       if cluster == nil {
-               return
-       }
-
-       log.Printf("arvados-ws %s started", version)
-       srv := &server{cluster: cluster}
-       log.Fatal(srv.Run())
-}
index 745d28f9523f36ca83afa0b29e9511e6f98176f9..ac895f80e5fd7ae7933558fbfa6e6acb97a6c7b0 100644 (file)
@@ -2,14 +2,16 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
+       "context"
        "net/http"
        "net/url"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
 )
 
 const (
@@ -19,7 +21,7 @@ const (
 
 type permChecker interface {
        SetToken(token string)
-       Check(uuid string) (bool, error)
+       Check(ctx context.Context, uuid string) (bool, error)
 }
 
 func newPermChecker(ac arvados.Client) permChecker {
@@ -54,9 +56,9 @@ func (pc *cachingPermChecker) SetToken(token string) {
        pc.cache = make(map[string]cacheEnt)
 }
 
-func (pc *cachingPermChecker) Check(uuid string) (bool, error) {
+func (pc *cachingPermChecker) Check(ctx context.Context, uuid string) (bool, error) {
        pc.nChecks++
-       logger := logger(nil).
+       logger := ctxlog.FromContext(ctx).
                WithField("token", pc.Client.AuthToken).
                WithField("uuid", uuid)
        pc.tidy()
index 5f972551ffe8ffeaa4e11ec81573ae46425591d3..023656c01fd93dc3a912283682ffc9eda59c7e6b 100644 (file)
@@ -2,9 +2,11 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
+       "context"
+
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        check "gopkg.in/check.v1"
@@ -22,19 +24,19 @@ func (s *permSuite) TestCheck(c *check.C) {
        }
        wantError := func(uuid string) {
                c.Log(uuid)
-               ok, err := pc.Check(uuid)
+               ok, err := pc.Check(context.Background(), uuid)
                c.Check(ok, check.Equals, false)
                c.Check(err, check.NotNil)
        }
        wantYes := func(uuid string) {
                c.Log(uuid)
-               ok, err := pc.Check(uuid)
+               ok, err := pc.Check(context.Background(), uuid)
                c.Check(ok, check.Equals, true)
                c.Check(err, check.IsNil)
        }
        wantNo := func(uuid string) {
                c.Log(uuid)
-               ok, err := pc.Check(uuid)
+               ok, err := pc.Check(context.Background(), uuid)
                c.Check(ok, check.Equals, false)
                c.Check(err, check.IsNil)
        }
@@ -67,7 +69,7 @@ func (s *permSuite) TestCheck(c *check.C) {
        pc.SetToken(arvadostest.ActiveToken)
 
        c.Log("...network error")
-       pc.Client.APIHost = "127.0.0.1:discard"
+       pc.Client.APIHost = "127.0.0.1:9"
        wantError(arvadostest.UserAgreementCollection)
        wantError(arvadostest.FooBarDirCollection)
 
index f8c273c5141b6f76f73b28c3c2c5d995f0df94dd..878c282f8a6c57f17192b777faba760485757b86 100644 (file)
@@ -2,13 +2,11 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
-       "encoding/json"
        "io"
        "net/http"
-       "strconv"
        "sync"
        "sync/atomic"
        "time"
@@ -16,6 +14,7 @@ import (
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "git.arvados.org/arvados.git/sdk/go/health"
+       "github.com/prometheus/client_golang/prometheus"
        "github.com/sirupsen/logrus"
        "golang.org/x/net/websocket"
 )
@@ -28,7 +27,7 @@ type wsConn interface {
 }
 
 type router struct {
-       client         arvados.Client
+       client         *arvados.Client
        cluster        *arvados.Cluster
        eventSource    eventSource
        newPermChecker func() permChecker
@@ -36,33 +35,26 @@ type router struct {
        handler   *handler
        mux       *http.ServeMux
        setupOnce sync.Once
-
-       lastReqID  int64
-       lastReqMtx sync.Mutex
-
-       status routerDebugStatus
-}
-
-type routerDebugStatus struct {
-       ReqsReceived int64
-       ReqsActive   int64
-}
-
-type debugStatuser interface {
-       DebugStatus() interface{}
+       done      chan struct{}
+       reg       *prometheus.Registry
 }
 
 func (rtr *router) setup() {
+       mSockets := prometheus.NewGaugeVec(prometheus.GaugeOpts{
+               Namespace: "arvados",
+               Subsystem: "ws",
+               Name:      "sockets",
+               Help:      "Number of connected sockets",
+       }, []string{"version"})
+       rtr.reg.MustRegister(mSockets)
+
        rtr.handler = &handler{
                PingTimeout: time.Duration(rtr.cluster.API.SendTimeout),
                QueueSize:   rtr.cluster.API.WebsocketClientEventQueue,
        }
        rtr.mux = http.NewServeMux()
-       rtr.mux.Handle("/websocket", rtr.makeServer(newSessionV0))
-       rtr.mux.Handle("/arvados/v1/events.ws", rtr.makeServer(newSessionV1))
-       rtr.mux.Handle("/debug.json", rtr.jsonHandler(rtr.DebugStatus))
-       rtr.mux.Handle("/status.json", rtr.jsonHandler(rtr.Status))
-
+       rtr.mux.Handle("/websocket", rtr.makeServer(newSessionV0, mSockets.WithLabelValues("0")))
+       rtr.mux.Handle("/arvados/v1/events.ws", rtr.makeServer(newSessionV1, mSockets.WithLabelValues("1")))
        rtr.mux.Handle("/_health/", &health.Handler{
                Token:  rtr.cluster.ManagementToken,
                Prefix: "/_health/",
@@ -71,91 +63,50 @@ func (rtr *router) setup() {
                },
                Log: func(r *http.Request, err error) {
                        if err != nil {
-                               logger(r.Context()).WithError(err).Error("error")
+                               ctxlog.FromContext(r.Context()).WithError(err).Error("error")
                        }
                },
        })
 }
 
-func (rtr *router) makeServer(newSession sessionFactory) *websocket.Server {
+func (rtr *router) makeServer(newSession sessionFactory, gauge prometheus.Gauge) *websocket.Server {
+       var connected int64
        return &websocket.Server{
                Handshake: func(c *websocket.Config, r *http.Request) error {
                        return nil
                },
                Handler: websocket.Handler(func(ws *websocket.Conn) {
                        t0 := time.Now()
-                       log := logger(ws.Request().Context())
-                       log.Info("connected")
+                       logger := ctxlog.FromContext(ws.Request().Context())
+                       atomic.AddInt64(&connected, 1)
+                       gauge.Set(float64(atomic.LoadInt64(&connected)))
 
-                       stats := rtr.handler.Handle(ws, rtr.eventSource,
+                       stats := rtr.handler.Handle(ws, logger, rtr.eventSource,
                                func(ws wsConn, sendq chan<- interface{}) (session, error) {
-                                       return newSession(ws, sendq, rtr.eventSource.DB(), rtr.newPermChecker(), &rtr.client)
+                                       return newSession(ws, sendq, rtr.eventSource.DB(), rtr.newPermChecker(), rtr.client)
                                })
 
-                       log.WithFields(logrus.Fields{
+                       logger.WithFields(logrus.Fields{
                                "elapsed": time.Now().Sub(t0).Seconds(),
                                "stats":   stats,
-                       }).Info("disconnect")
+                       }).Info("client disconnected")
                        ws.Close()
+                       atomic.AddInt64(&connected, -1)
+                       gauge.Set(float64(atomic.LoadInt64(&connected)))
                }),
        }
 }
 
-func (rtr *router) newReqID() string {
-       rtr.lastReqMtx.Lock()
-       defer rtr.lastReqMtx.Unlock()
-       id := time.Now().UnixNano()
-       if id <= rtr.lastReqID {
-               id = rtr.lastReqID + 1
-       }
-       return strconv.FormatInt(id, 36)
-}
-
-func (rtr *router) DebugStatus() interface{} {
-       s := map[string]interface{}{
-               "HTTP":     rtr.status,
-               "Outgoing": rtr.handler.DebugStatus(),
-       }
-       if es, ok := rtr.eventSource.(debugStatuser); ok {
-               s["EventSource"] = es.DebugStatus()
-       }
-       return s
-}
-
-func (rtr *router) Status() interface{} {
-       return map[string]interface{}{
-               "Clients": atomic.LoadInt64(&rtr.status.ReqsActive),
-               "Version": version,
-       }
-}
-
 func (rtr *router) ServeHTTP(resp http.ResponseWriter, req *http.Request) {
        rtr.setupOnce.Do(rtr.setup)
-       atomic.AddInt64(&rtr.status.ReqsReceived, 1)
-       atomic.AddInt64(&rtr.status.ReqsActive, 1)
-       defer atomic.AddInt64(&rtr.status.ReqsActive, -1)
-
-       logger := logger(req.Context()).
-               WithField("RequestID", rtr.newReqID())
-       ctx := ctxlog.Context(req.Context(), logger)
-       req = req.WithContext(ctx)
-       logger.WithFields(logrus.Fields{
-               "remoteAddr":      req.RemoteAddr,
-               "reqForwardedFor": req.Header.Get("X-Forwarded-For"),
-       }).Info("accept request")
        rtr.mux.ServeHTTP(resp, req)
 }
 
-func (rtr *router) jsonHandler(fn func() interface{}) http.Handler {
-       return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
-               logger := logger(r.Context())
-               w.Header().Set("Content-Type", "application/json")
-               enc := json.NewEncoder(w)
-               err := enc.Encode(fn())
-               if err != nil {
-                       msg := "encode failed"
-                       logger.WithError(err).Error(msg)
-                       http.Error(w, msg, http.StatusInternalServerError)
-               }
-       })
+func (rtr *router) CheckHealth() error {
+       rtr.setupOnce.Do(rtr.setup)
+       return rtr.eventSource.DBHealth()
+}
+
+func (rtr *router) Done() <-chan struct{} {
+       return rtr.done
 }
diff --git a/services/ws/server.go b/services/ws/server.go
deleted file mode 100644 (file)
index 9747ea1..0000000
+++ /dev/null
@@ -1,89 +0,0 @@
-// Copyright (C) The Arvados Authors. All rights reserved.
-//
-// SPDX-License-Identifier: AGPL-3.0
-
-package main
-
-import (
-       "net"
-       "net/http"
-       "sync"
-       "time"
-
-       "git.arvados.org/arvados.git/sdk/go/arvados"
-       "github.com/coreos/go-systemd/daemon"
-)
-
-type server struct {
-       httpServer  *http.Server
-       listener    net.Listener
-       cluster     *arvados.Cluster
-       eventSource *pgEventSource
-       setupOnce   sync.Once
-}
-
-func (srv *server) Close() {
-       srv.WaitReady()
-       srv.eventSource.Close()
-       srv.httpServer.Close()
-       srv.listener.Close()
-}
-
-func (srv *server) WaitReady() {
-       srv.setupOnce.Do(srv.setup)
-       srv.eventSource.WaitReady()
-}
-
-func (srv *server) Run() error {
-       srv.setupOnce.Do(srv.setup)
-       return srv.httpServer.Serve(srv.listener)
-}
-
-func (srv *server) setup() {
-       log := logger(nil)
-
-       var listen arvados.URL
-       for listen, _ = range srv.cluster.Services.Websocket.InternalURLs {
-               break
-       }
-       ln, err := net.Listen("tcp", listen.Host)
-       if err != nil {
-               log.WithField("Listen", listen).Fatal(err)
-       }
-       log.WithField("Listen", ln.Addr().String()).Info("listening")
-
-       client := arvados.Client{}
-       client.APIHost = srv.cluster.Services.Controller.ExternalURL.Host
-       client.AuthToken = srv.cluster.SystemRootToken
-       client.Insecure = srv.cluster.TLS.Insecure
-
-       srv.listener = ln
-       srv.eventSource = &pgEventSource{
-               DataSource:   srv.cluster.PostgreSQL.Connection.String(),
-               MaxOpenConns: srv.cluster.PostgreSQL.ConnectionPool,
-               QueueSize:    srv.cluster.API.WebsocketServerEventQueue,
-       }
-
-       srv.httpServer = &http.Server{
-               Addr:           listen.Host,
-               ReadTimeout:    time.Minute,
-               WriteTimeout:   time.Minute,
-               MaxHeaderBytes: 1 << 20,
-               Handler: &router{
-                       cluster:        srv.cluster,
-                       client:         client,
-                       eventSource:    srv.eventSource,
-                       newPermChecker: func() permChecker { return newPermChecker(client) },
-               },
-       }
-
-       go func() {
-               srv.eventSource.Run()
-               log.Info("event source stopped")
-               srv.Close()
-       }()
-
-       if _, err := daemon.SdNotify(false, "READY=1"); err != nil {
-               log.WithError(err).Warn("error notifying init daemon")
-       }
-}
diff --git a/services/ws/service.go b/services/ws/service.go
new file mode 100644 (file)
index 0000000..761e22e
--- /dev/null
@@ -0,0 +1,53 @@
+// Copyright (C) The Arvados Authors. All rights reserved.
+//
+// SPDX-License-Identifier: AGPL-3.0
+
+package ws
+
+import (
+       "context"
+       "fmt"
+
+       "git.arvados.org/arvados.git/lib/cmd"
+       "git.arvados.org/arvados.git/lib/service"
+       "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "github.com/prometheus/client_golang/prometheus"
+)
+
+var testMode = false
+
+var Command cmd.Handler = service.Command(arvados.ServiceNameWebsocket, newHandler)
+
+func newHandler(ctx context.Context, cluster *arvados.Cluster, token string, reg *prometheus.Registry) service.Handler {
+       client, err := arvados.NewClientFromConfig(cluster)
+       if err != nil {
+               return service.ErrorHandler(ctx, cluster, fmt.Errorf("error initializing client from cluster config: %s", err))
+       }
+       eventSource := &pgEventSource{
+               DataSource:   cluster.PostgreSQL.Connection.String(),
+               MaxOpenConns: cluster.PostgreSQL.ConnectionPool,
+               QueueSize:    cluster.API.WebsocketServerEventQueue,
+               Logger:       ctxlog.FromContext(ctx),
+               Reg:          reg,
+       }
+       done := make(chan struct{})
+       go func() {
+               eventSource.Run()
+               ctxlog.FromContext(ctx).Error("event source stopped")
+               close(done)
+       }()
+       eventSource.WaitReady()
+       if err := eventSource.DBHealth(); err != nil {
+               return service.ErrorHandler(ctx, cluster, err)
+       }
+       rtr := &router{
+               cluster:        cluster,
+               client:         client,
+               eventSource:    eventSource,
+               newPermChecker: func() permChecker { return newPermChecker(*client) },
+               done:           done,
+               reg:            reg,
+       }
+       return rtr
+}
similarity index 55%
rename from services/ws/server_test.go
rename to services/ws/service_test.go
index 88279ec9b2de83cd28bc191815bd1fa274cfec80..7213dcad2a9ddbb967991d70a2f9b094ce317b98 100644 (file)
@@ -2,39 +2,61 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
-       "encoding/json"
+       "bytes"
+       "context"
+       "flag"
        "io/ioutil"
        "net/http"
+       "net/http/httptest"
        "os"
+       "strings"
        "sync"
        "time"
 
        "git.arvados.org/arvados.git/lib/config"
+       "git.arvados.org/arvados.git/lib/service"
        "git.arvados.org/arvados.git/sdk/go/arvados"
        "git.arvados.org/arvados.git/sdk/go/arvadostest"
        "git.arvados.org/arvados.git/sdk/go/ctxlog"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
+       "github.com/prometheus/client_golang/prometheus"
+       "github.com/sirupsen/logrus"
        check "gopkg.in/check.v1"
 )
 
-var _ = check.Suite(&serverSuite{})
+var _ = check.Suite(&serviceSuite{})
 
-type serverSuite struct {
+type serviceSuite struct {
+       handler service.Handler
+       reg     *prometheus.Registry
+       srv     *httptest.Server
        cluster *arvados.Cluster
-       srv     *server
        wg      sync.WaitGroup
 }
 
-func (s *serverSuite) SetUpTest(c *check.C) {
+func (s *serviceSuite) SetUpTest(c *check.C) {
        var err error
        s.cluster, err = s.testConfig(c)
        c.Assert(err, check.IsNil)
-       s.srv = &server{cluster: s.cluster}
 }
 
-func (*serverSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
+func (s *serviceSuite) start(c *check.C) {
+       s.reg = prometheus.NewRegistry()
+       s.handler = newHandler(context.Background(), s.cluster, "", s.reg)
+       instrumented := httpserver.Instrument(s.reg, ctxlog.TestLogger(c), s.handler)
+       s.srv = httptest.NewServer(instrumented.ServeAPI(s.cluster.ManagementToken, instrumented))
+}
+
+func (s *serviceSuite) TearDownTest(c *check.C) {
+       if s.srv != nil {
+               s.srv.Close()
+       }
+}
+
+func (*serviceSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
        ldr := config.NewLoader(nil, ctxlog.TestLogger(c))
        cfg, err := ldr.Load()
        if err != nil {
@@ -49,47 +71,30 @@ func (*serverSuite) testConfig(c *check.C) (*arvados.Cluster, error) {
        cluster.SystemRootToken = client.AuthToken
        cluster.TLS.Insecure = client.Insecure
        cluster.PostgreSQL.Connection = testDBConfig()
+       cluster.PostgreSQL.ConnectionPool = 12
        cluster.Services.Websocket.InternalURLs = map[arvados.URL]arvados.ServiceInstance{arvados.URL{Host: ":"}: arvados.ServiceInstance{}}
        cluster.ManagementToken = arvadostest.ManagementToken
        return cluster, nil
 }
 
-// TestBadDB ensures Run() returns an error (instead of panicking or
-// deadlocking) if it can't connect to the database server at startup.
-func (s *serverSuite) TestBadDB(c *check.C) {
+// TestBadDB ensures the server returns an error (instead of panicking
+// or deadlocking) if it can't connect to the database server at
+// startup.
+func (s *serviceSuite) TestBadDB(c *check.C) {
        s.cluster.PostgreSQL.Connection["password"] = "1234"
-
-       var wg sync.WaitGroup
-       wg.Add(1)
-       go func() {
-               err := s.srv.Run()
-               c.Check(err, check.NotNil)
-               wg.Done()
-       }()
-       wg.Add(1)
-       go func() {
-               s.srv.WaitReady()
-               wg.Done()
-       }()
-
-       done := make(chan bool)
-       go func() {
-               wg.Wait()
-               close(done)
-       }()
-       select {
-       case <-done:
-       case <-time.After(10 * time.Second):
-               c.Fatal("timeout")
-       }
+       s.start(c)
+       resp, err := http.Get(s.srv.URL)
+       c.Check(err, check.IsNil)
+       c.Check(resp.StatusCode, check.Equals, http.StatusInternalServerError)
+       c.Check(s.handler.CheckHealth(), check.ErrorMatches, "database not connected")
+       c.Check(err, check.IsNil)
+       c.Check(resp.StatusCode, check.Equals, http.StatusInternalServerError)
 }
 
-func (s *serverSuite) TestHealth(c *check.C) {
-       go s.srv.Run()
-       defer s.srv.Close()
-       s.srv.WaitReady()
+func (s *serviceSuite) TestHealth(c *check.C) {
+       s.start(c)
        for _, token := range []string{"", "foo", s.cluster.ManagementToken} {
-               req, err := http.NewRequest("GET", "http://"+s.srv.listener.Addr().String()+"/_health/ping", nil)
+               req, err := http.NewRequest("GET", s.srv.URL+"/_health/ping", nil)
                c.Assert(err, check.IsNil)
                if token != "" {
                        req.Header.Add("Authorization", "Bearer "+token)
@@ -107,30 +112,38 @@ func (s *serverSuite) TestHealth(c *check.C) {
        }
 }
 
-func (s *serverSuite) TestStatus(c *check.C) {
-       go s.srv.Run()
-       defer s.srv.Close()
-       s.srv.WaitReady()
-       req, err := http.NewRequest("GET", "http://"+s.srv.listener.Addr().String()+"/status.json", nil)
-       c.Assert(err, check.IsNil)
-       resp, err := http.DefaultClient.Do(req)
-       c.Check(err, check.IsNil)
-       c.Check(resp.StatusCode, check.Equals, http.StatusOK)
-       var status map[string]interface{}
-       err = json.NewDecoder(resp.Body).Decode(&status)
-       c.Check(err, check.IsNil)
-       c.Check(status["Version"], check.Not(check.Equals), "")
+func (s *serviceSuite) TestMetrics(c *check.C) {
+       s.start(c)
+       s.handler.CheckHealth()
+       for deadline := time.Now().Add(time.Second); ; {
+               req, err := http.NewRequest("GET", s.srv.URL+"/metrics", nil)
+               c.Assert(err, check.IsNil)
+               req.Header.Set("Authorization", "Bearer "+s.cluster.ManagementToken)
+               resp, err := http.DefaultClient.Do(req)
+               c.Check(err, check.IsNil)
+               c.Check(resp.StatusCode, check.Equals, http.StatusOK)
+               text, err := ioutil.ReadAll(resp.Body)
+               c.Check(err, check.IsNil)
+               if strings.Contains(string(text), "_db_max_connections 0\n") {
+                       // wait for the first db stats update
+                       if time.Now().After(deadline) {
+                               c.Fatal("timed out")
+                       }
+                       time.Sleep(time.Second / 50)
+                       continue
+               }
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_max_connections 12\n.*`)
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_open_connections\{inuse="0"\} \d+\n.*`)
+               c.Check(string(text), check.Matches, `(?ms).*\narvados_ws_db_open_connections\{inuse="1"\} \d+\n.*`)
+               break
+       }
 }
 
-func (s *serverSuite) TestHealthDisabled(c *check.C) {
+func (s *serviceSuite) TestHealthDisabled(c *check.C) {
        s.cluster.ManagementToken = ""
-
-       go s.srv.Run()
-       defer s.srv.Close()
-       s.srv.WaitReady()
-
+       s.start(c)
        for _, token := range []string{"", "foo", arvadostest.ManagementToken} {
-               req, err := http.NewRequest("GET", "http://"+s.srv.listener.Addr().String()+"/_health/ping", nil)
+               req, err := http.NewRequest("GET", s.srv.URL+"/_health/ping", nil)
                c.Assert(err, check.IsNil)
                req.Header.Add("Authorization", "Bearer "+token)
                resp, err := http.DefaultClient.Do(req)
@@ -139,7 +152,7 @@ func (s *serverSuite) TestHealthDisabled(c *check.C) {
        }
 }
 
-func (s *serverSuite) TestLoadLegacyConfig(c *check.C) {
+func (s *serviceSuite) TestLoadLegacyConfig(c *check.C) {
        content := []byte(`
 Client:
   APIHost: example.com
@@ -175,7 +188,14 @@ ManagementToken: qqqqq
                c.Error(err)
 
        }
-       cluster := configure(logger(nil), []string{"arvados-ws", "-config", tmpfile.Name()})
+       ldr := config.NewLoader(&bytes.Buffer{}, logrus.New())
+       flagset := flag.NewFlagSet("", flag.ContinueOnError)
+       ldr.SetupFlags(flagset)
+       flagset.Parse(ldr.MungeLegacyConfigArgs(ctxlog.TestLogger(c), []string{"-config", tmpfile.Name()}, "-legacy-ws-config"))
+       cfg, err := ldr.Load()
+       c.Check(err, check.IsNil)
+       cluster, err := cfg.GetCluster("")
+       c.Check(err, check.IsNil)
        c.Check(cluster, check.NotNil)
 
        c.Check(cluster.Services.Controller.ExternalURL, check.Equals, arvados.URL{Scheme: "https", Host: "example.com"})
index 53b02146d560fe3eb4d045227277d60a8c6e072b..c0cfbd6d02f6ff37083f426c85084effae45f212 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "database/sql"
index b0f40371ffeb0ba12c5d3d1e1326d320fb6dbb51..309352b39edbd329aa031ec0c6194791341acec9 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "database/sql"
@@ -14,6 +14,7 @@ import (
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/ctxlog"
        "github.com/sirupsen/logrus"
 )
 
@@ -59,7 +60,7 @@ func newSessionV0(ws wsConn, sendq chan<- interface{}, db *sql.DB, pc permChecke
                db:          db,
                ac:          ac,
                permChecker: pc,
-               log:         logger(ws.Request().Context()),
+               log:         ctxlog.FromContext(ws.Request().Context()),
        }
 
        err := ws.Request().ParseForm()
@@ -128,7 +129,7 @@ func (sess *v0session) EventMessage(e *event) ([]byte, error) {
        } else {
                permTarget = detail.ObjectUUID
        }
-       ok, err := sess.permChecker.Check(permTarget)
+       ok, err := sess.permChecker.Check(sess.ws.Request().Context(), permTarget)
        if err != nil || !ok {
                return nil, err
        }
index bd70b44459dd79b5f22b0c08074b2d4bf480d76f..7986cc7b08f95598ae4756be0aa1ca3dea2e2f7b 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "bytes"
@@ -11,6 +11,7 @@ import (
        "io"
        "net/url"
        "os"
+       "strings"
        "sync"
        "time"
 
@@ -30,17 +31,16 @@ func init() {
 var _ = check.Suite(&v0Suite{})
 
 type v0Suite struct {
-       serverSuite serverSuite
-       token       string
-       toDelete    []string
-       wg          sync.WaitGroup
-       ignoreLogID uint64
+       serviceSuite serviceSuite
+       token        string
+       toDelete     []string
+       wg           sync.WaitGroup
+       ignoreLogID  uint64
 }
 
 func (s *v0Suite) SetUpTest(c *check.C) {
-       s.serverSuite.SetUpTest(c)
-       go s.serverSuite.srv.Run()
-       s.serverSuite.srv.WaitReady()
+       s.serviceSuite.SetUpTest(c)
+       s.serviceSuite.start(c)
 
        s.token = arvadostest.ActiveToken
        s.ignoreLogID = s.lastLogID(c)
@@ -48,7 +48,7 @@ func (s *v0Suite) SetUpTest(c *check.C) {
 
 func (s *v0Suite) TearDownTest(c *check.C) {
        s.wg.Wait()
-       s.serverSuite.srv.Close()
+       s.serviceSuite.TearDownTest(c)
 }
 
 func (s *v0Suite) TearDownSuite(c *check.C) {
@@ -353,8 +353,8 @@ func (s *v0Suite) expectLog(c *check.C, r *json.Decoder) *arvados.Log {
 }
 
 func (s *v0Suite) testClient() (*websocket.Conn, *json.Decoder, *json.Encoder) {
-       srv := s.serverSuite.srv
-       conn, err := websocket.Dial("ws://"+srv.listener.Addr().String()+"/websocket?api_token="+s.token, "", "http://"+srv.listener.Addr().String())
+       srv := s.serviceSuite.srv
+       conn, err := websocket.Dial(strings.Replace(srv.URL, "http", "ws", 1)+"/websocket?api_token="+s.token, "", srv.URL)
        if err != nil {
                panic(err)
        }
index 58f77df430201f79e71f66209711a740dff8a016..60b980d58e2f8f8a9acc67362deb7d7beff21350 100644 (file)
@@ -2,7 +2,7 @@
 //
 // SPDX-License-Identifier: AGPL-3.0
 
-package main
+package ws
 
 import (
        "database/sql"
index d2585a6666c270de61514d007257d648eecd2287..efa2e08a7a7f34c3a04ee4c213931ed37a4f65ab 100755 (executable)
@@ -7,34 +7,14 @@ exec 2>&1
 set -ex -o pipefail
 
 . /usr/local/lib/arvbox/common.sh
-
-if test -s /var/lib/arvados/api_rails_env ; then
-  RAILS_ENV=$(cat /var/lib/arvados/api_rails_env)
-else
-  RAILS_ENV=development
-fi
-
 . /usr/local/lib/arvbox/go-setup.sh
 
-flock /var/lib/gopath/gopath.lock go install "git.arvados.org/arvados.git/services/ws"
-install $GOPATH/bin/ws /usr/local/bin/arvados-ws
+(cd /usr/local/bin && ln -sf arvados-server arvados-ws)
 
 if test "$1" = "--only-deps" ; then
     exit
 fi
 
-database_pw=$(cat /var/lib/arvados/api_database_pw)
-
-cat >/var/lib/arvados/arvados-ws.yml <<EOF
-Client:
-  APIHost: $localip:${services[controller-ssl]}
-  Insecure: false
-Postgres:
-  dbname: arvados_$RAILS_ENV
-  user: arvados
-  password: $database_pw
-  host: localhost
-Listen: localhost:${services[websockets]}
-EOF
+/usr/local/lib/arvbox/runsu.sh flock /var/lib/arvados/cluster_config.yml.lock /usr/local/lib/arvbox/cluster-config.sh
 
-exec /usr/local/bin/arvados-ws -config /var/lib/arvados/arvados-ws.yml
+exec /usr/local/lib/arvbox/runsu.sh /usr/local/bin/arvados-ws