From: Tom Clegg Date: Mon, 25 Jul 2022 14:56:27 +0000 (-0400) Subject: 19166: Merge branch 'main' X-Git-Tag: 2.5.0~106^2 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/2e03d03bc55b5a612c2bf04d878a72f2ee420d99?hp=-c 19166: Merge branch 'main' Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- 2e03d03bc55b5a612c2bf04d878a72f2ee420d99 diff --combined cmd/arvados-server/cmd.go index b3feca4370,d9c41ca587..438ca206da --- a/cmd/arvados-server/cmd.go +++ b/cmd/arvados-server/cmd.go @@@ -11,6 -11,9 +11,9 @@@ import "io" "net/http" "os" + "path" + "path/filepath" + "strings" "git.arvados.org/arvados.git/lib/boot" "git.arvados.org/arvados.git/lib/cloud/cloudtest" @@@ -25,7 -28,6 +28,7 @@@ "git.arvados.org/arvados.git/lib/service" "git.arvados.org/arvados.git/sdk/go/arvados" "git.arvados.org/arvados.git/sdk/go/health" + dispatchslurm "git.arvados.org/arvados.git/services/crunch-dispatch-slurm" "git.arvados.org/arvados.git/services/githttpd" keepbalance "git.arvados.org/arvados.git/services/keep-balance" keepweb "git.arvados.org/arvados.git/services/keep-web" @@@ -51,7 -53,6 +54,7 @@@ var "crunch-run": crunchrun.Command, "dispatch-cloud": dispatchcloud.Command, "dispatch-lsf": lsf.DispatchCommand, + "dispatch-slurm": dispatchslurm.Command, "git-httpd": githttpd.Command, "health": healthCommand, "install": install.Command, @@@ -82,8 -83,21 +85,21 @@@ func (wb2command) RunCommand(prog strin fmt.Fprintf(stderr, "json.Marshal: %s\n", err) return 1 } + servefs := http.FileServer(http.Dir(args[2])) mux := http.NewServeMux() - mux.Handle("/", http.FileServer(http.Dir(args[2]))) + mux.Handle("/", http.HandlerFunc(func(w http.ResponseWriter, req *http.Request) { + for _, ent := range strings.Split(req.URL.Path, "/") { + if ent == ".." { + http.Error(w, "invalid URL path", http.StatusBadRequest) + return + } + } + fnm := filepath.Join(args[2], filepath.FromSlash(path.Clean("/"+req.URL.Path))) + if _, err := os.Stat(fnm); os.IsNotExist(err) { + req.URL.Path = "/" + } + servefs.ServeHTTP(w, req) + })) mux.HandleFunc("/config.json", func(w http.ResponseWriter, _ *http.Request) { w.Write(configJSON) }) diff --combined doc/_config.yml index 2f31336185,d2bb7e7975..148e1a166e --- a/doc/_config.yml +++ b/doc/_config.yml @@@ -161,7 -161,6 +161,7 @@@ navbar - Computation with Crunch: - api/execution.html.textile.liquid - architecture/dispatchcloud.html.textile.liquid + - architecture/hpc.html.textile.liquid - architecture/singularity.html.textile.liquid - Other: - api/permission-model.html.textile.liquid @@@ -178,7 -177,7 +178,7 @@@ - admin/federation.html.textile.liquid - admin/merge-remote-account.html.textile.liquid - admin/migrating-providers.html.textile.liquid - - user/topics/arvados-sync-groups.html.textile.liquid + - user/topics/arvados-sync-external-sources.html.textile.liquid - admin/scoped-tokens.html.textile.liquid - admin/token-expiration-policy.html.textile.liquid - admin/user-activity.html.textile.liquid diff --combined doc/admin/config-urls.html.textile.liquid index c9c964bc55,01c30f0e0e..500e0d8c8c --- a/doc/admin/config-urls.html.textile.liquid +++ b/doc/admin/config-urls.html.textile.liquid @@@ -16,9 -16,9 +16,9 @@@ The @Services@ section lists a number o The @ExternalURL@ is the address where the service should be reachable by clients, both from inside and from outside the Arvados cluster. Some services do not expose an Arvados API, only Prometheus metrics. In that case, @ExternalURL@ is not used. - The keys under @InternalURLs@ are addresses that are used by the reverse proxy (e.g. Nginx) that fronts Arvados services. The exception is the @Keepstore@ service, where clients connect directly to the addresses listed under @InternalURLs@. If a service is not fronted by a reverse proxy, e.g. when its endpoint only exposes Prometheus metrics, the intention is that metrics are collected directly from the endpoints defined in @InternalURLs@. + The keys under @InternalURLs@ are the URLs through which Arvados system components can connect to one another, including the reverse proxy (e.g. Nginx) that fronts Arvados services. The exception is the @Keepstore@ service, where clients on the local network connect directly to @Keepstore.InternalURLs@ (while clients from outside networks connect to @Keepproxy.ExternalURL@). If a service is not fronted by a reverse proxy, e.g. when its endpoint only exposes Prometheus metrics, the intention is that metrics are collected directly from the endpoints defined in @InternalURLs@. - @InternalURLs@ are also used by the service itself to figure out which address/port to listen on. + Each entry in the @InternalURLs@ section may also indicate a @ListenURL@ to determine the protocol, address/interface, and port where the service process will listen, in case the desired listening address differs from the @InternalURLs@ key itself -- for example, when passing internal traffic through a reverse proxy. If the Arvados service lives behind a reverse proxy (e.g. Nginx), configuring the reverse proxy and the @InternalURLs@ and @ExternalURL@ values must be done in concert. @@@ -28,7 -28,7 +28,7 @@@ h2. Overvie table(table table-bordered table-condensed). |_.Service |_.ExternalURL required? |_.InternalURLs required?|_.InternalURLs must be reachable from other cluster nodes?|_.Note| |railsapi |no |yes|no ^1^|InternalURLs only used by Controller| -|controller |yes |yes|no ^2^|InternalURLs only used by reverse proxy (e.g. Nginx)| +|controller |yes |yes|yes ^2,4^|InternalURLs used by reverse proxy and container shell connections| |arvados-dispatch-cloud|no |yes|no ^3^|InternalURLs only used to expose Prometheus metrics| |arvados-dispatch-lsf|no |yes|no ^3^|InternalURLs only used to expose Prometheus metrics| |git-http |yes |yes|no ^2^|InternalURLs only used by reverse proxy (e.g. Nginx)| @@@ -45,7 -45,6 +45,7 @@@ ^1^ If @Controller@ runs on a different host than @RailsAPI@, the @InternalURLs@ will need to be reachable from the host that runs @Controller@. ^2^ If the reverse proxy (e.g. Nginx) does not run on the same host as the Arvados service it fronts, the @InternalURLs@ will need to be reachable from the host that runs the reverse proxy. ^3^ If the Prometheus metrics are not collected from the same machine that runs the service, the @InternalURLs@ will need to be reachable from the host that collects the metrics. +^4^ If dispatching containers to HPC (Slurm/LSF) and there are multiple @Controller@ services, they must be able to connect to one another using their InternalURLs, otherwise the "tunnel connections":{{site.baseurl}}/architecture/hpc.html enabling "container shell access":{{site.baseurl}}/install/container-shell-access.html will not work. When @InternalURLs@ do not need to be reachable from other nodes, it is most secure to use loopback addresses as @InternalURLs@, e.g. @http://127.0.0.1:9005@. @@@ -229,11 -228,12 +229,12 @@@ Consider this section for the @Controll {% codeblock as yaml %} Controller: InternalURLs: - "http://localhost:8003": {} + "https://ctrl-0.internal": + ListenURL: "http://localhost:8003" ExternalURL: "https://ClusterID.example.com" {% endcodeblock %} - The @ExternalURL@ advertised is @https://ClusterID.example.com@. The @Controller@ service will start up on @localhost@ port 8003. Nginx is configured to sit in front of the @Controller@ service and terminates SSL: + The @ExternalURL@ advertised to clients is @https://ClusterID.example.com@. The @arvados-controller@ process will listen on @localhost@ port 8003. Other Arvados service processes in the cluster can connect to this specific controller instance, using the URL @https://ctrl-0.internal@. Nginx is configured to sit in front of the @Controller@ service and terminate TLS:

  # This is the port where nginx expects to contact arvados-controller.
@@@ -246,7 -246,7 +247,7 @@@ server 
    # the request is reverse proxied to the upstream 'controller'
  
    listen       443 ssl;
-   server_name  ClusterID.example.com;
+   server_name  ClusterID.example.com ctrl-0.internal;
  
    ssl_certificate     /YOUR/PATH/TO/cert.pem;
    ssl_certificate_key /YOUR/PATH/TO/cert.key;
@@@ -276,4 -276,13 +277,13 @@@
  }
  
+ If the host part of @ListenURL@ is ambiguous, in the sense that more than one system host is able to listen on that address (e.g., @localhost@), configure each host's startup scripts to set the environment variable @ARVADOS_SERVICE_INTERNAL_URL@ to the @InternalURLs@ key that will reach that host. In the example above, this would be @ARVADOS_SERVICE_INTERNAL_URL=https://ctrl-0.internal@. + + If the cluster has just a single node running all of the Arvados server processes, configuration can be simplified: + {% codeblock as yaml %} + Controller: + InternalURLs: + "http://localhost:8003": {} + ExternalURL: "https://ClusterID.example.com" + {% endcodeblock %} diff --combined lib/crunchrun/crunchrun.go index 0bf19083a2,68181395fa..eadf22876f --- a/lib/crunchrun/crunchrun.go +++ b/lib/crunchrun/crunchrun.go @@@ -999,6 -999,7 +999,7 @@@ func (runner *ContainerRunner) CreateCo env["ARVADOS_API_TOKEN"] = tok env["ARVADOS_API_HOST"] = os.Getenv("ARVADOS_API_HOST") env["ARVADOS_API_HOST_INSECURE"] = os.Getenv("ARVADOS_API_HOST_INSECURE") + env["ARVADOS_KEEP_SERVICES"] = os.Getenv("ARVADOS_KEEP_SERVICES") } workdir := runner.Container.Cwd if workdir == "." { @@@ -1743,7 -1744,6 +1744,7 @@@ func (command) RunCommand(prog string, runtimeEngine := flags.String("runtime-engine", "docker", "container runtime: docker or singularity") brokenNodeHook := flags.String("broken-node-hook", "", "script to run if node is detected to be broken (for example, Docker daemon is not running)") flags.Duration("check-containerd", 0, "Ignored. Exists for compatibility with older versions.") + version := flags.Bool("version", false, "Write version information to stdout and exit 0.") ignoreDetachFlag := false if len(args) > 0 && args[0] == "-no-detach" { @@@ -1759,9 -1759,6 +1760,9 @@@ if ok, code := cmd.ParseFlags(flags, prog, args, "container-uuid", stderr); !ok { return code + } else if *version { + fmt.Fprintln(stdout, prog, cmd.Version.String()) + return 0 } else if !*list && flags.NArg() != 1 { fmt.Fprintf(stderr, "missing required argument: container-uuid (try -help)\n") return 2 @@@ -1909,8 -1906,11 +1910,8 @@@ // not safe to run a gateway service without an auth // secret cr.CrunchLog.Printf("Not starting a gateway server (GatewayAuthSecret was not provided by dispatcher)") - } else if gwListen := os.Getenv("GatewayAddress"); gwListen == "" { - // dispatcher did not tell us which external IP - // address to advertise --> no gateway service - cr.CrunchLog.Printf("Not starting a gateway server (GatewayAddress was not provided by dispatcher)") } else { + gwListen := os.Getenv("GatewayAddress") cr.gateway = Gateway{ Address: gwListen, AuthSecret: gwAuthSecret, @@@ -1918,18 -1918,6 +1919,18 @@@ Target: cr.executor, Log: cr.CrunchLog, } + if gwListen == "" { + // Direct connection won't work, so we use the + // gateway_address field to indicate the + // internalURL of the controller process that + // has the current tunnel connection. + cr.gateway.ArvadosClient = cr.dispatcherClient + cr.gateway.UpdateTunnelURL = func(url string) { + cr.gateway.Address = "tunnel " + url + cr.DispatcherArvClient.Update("containers", containerUUID, + arvadosclient.Dict{"container": arvadosclient.Dict{"gateway_address": cr.gateway.Address}}, nil) + } + } err = cr.gateway.Start() if err != nil { log.Printf("error starting gateway server: %s", err) @@@ -2058,7 -2046,8 +2059,8 @@@ func startLocalKeepstore(configData Con // modify the cluster configuration that we feed it on stdin. configData.Cluster.API.MaxKeepBlobBuffers = configData.KeepBuffers - ln, err := net.Listen("tcp", "localhost:0") + localaddr := localKeepstoreAddr() + ln, err := net.Listen("tcp", net.JoinHostPort(localaddr, "0")) if err != nil { return nil, err } @@@ -2068,7 -2057,7 +2070,7 @@@ return nil, err } ln.Close() - url := "http://localhost:" + port + url := "http://" + net.JoinHostPort(localaddr, port) fmt.Fprintf(logbuf, "starting keepstore on %s\n", url) @@@ -2160,3 -2149,43 +2162,43 @@@ func currentUserAndGroups() string } return s } + + // Return a suitable local interface address for a local keepstore + // service. Currently this is the numerically lowest non-loopback ipv4 + // address assigned to a local interface that is not in any of the + // link-local/vpn/loopback ranges 169.254/16, 100.64/10, or 127/8. + func localKeepstoreAddr() string { + var ips []net.IP + // Ignore error (proceed with zero IPs) + addrs, _ := processIPs(os.Getpid()) + for addr := range addrs { + ip := net.ParseIP(addr) + if ip == nil { + // invalid + continue + } + if ip.Mask(net.CIDRMask(8, 32)).Equal(net.IPv4(127, 0, 0, 0)) || + ip.Mask(net.CIDRMask(10, 32)).Equal(net.IPv4(100, 64, 0, 0)) || + ip.Mask(net.CIDRMask(16, 32)).Equal(net.IPv4(169, 254, 0, 0)) { + // unsuitable + continue + } + ips = append(ips, ip) + } + if len(ips) == 0 { + return "0.0.0.0" + } + sort.Slice(ips, func(ii, jj int) bool { + i, j := ips[ii], ips[jj] + if len(i) != len(j) { + return len(i) < len(j) + } + for x := range i { + if i[x] != j[x] { + return i[x] < j[x] + } + } + return false + }) + return ips[0].String() + } diff --combined lib/crunchrun/integration_test.go index a18829f30d,9860c79497..d569020824 --- a/lib/crunchrun/integration_test.go +++ b/lib/crunchrun/integration_test.go @@@ -223,26 -223,19 +223,31 @@@ func (s *integrationSuite) TestRunTrivi c.Check(log, trial.matchGetReq, `(?ms).*"reqMethod":"GET".*`) c.Check(log, trial.matchPutReq, `(?ms).*"reqMethod":"PUT".*,"reqPath":"0e3bcff26d51c895a60ea0d4585e134d".*`) } + + c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*using local keepstore process .* at http://[\d\.]{7,}:\d+.*`) + c.Check(s.logFiles["crunch-run.txt"], Not(Matches), `(?ms).* at http://127\..*`) + c.Check(s.logFiles["crunch-run.txt"], Not(Matches), `(?ms).* at http://169\.254\..*`) + c.Check(s.logFiles["stderr.txt"], Matches, `(?ms).*ARVADOS_KEEP_SERVICES=http://[\d\.]{7,}:\d+\n.*`) } +} +func (s *integrationSuite) TestRunTrivialContainerWithNoLocalKeepstore(c *C) { // Check that (1) config is loaded from $ARVADOS_CONFIG when // not provided on stdin and (2) if a local keepstore is not // started, crunch-run.txt explains why not. s.SetUpTest(c) s.stdin.Reset() s.testRunTrivialContainer(c) + c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*not starting a local keepstore process because KeepBuffers=0 in config\n.*`) + + s.SetUpTest(c) + s.args = []string{"-config", c.MkDir() + "/config.yaml"} + s.stdin.Reset() + buf, err := ioutil.ReadFile(os.Getenv("ARVADOS_CONFIG")) + c.Assert(err, IsNil) + err = ioutil.WriteFile(s.args[1], bytes.Replace(buf, []byte("LocalKeepBlobBuffersPerVCPU: 0"), []byte("LocalKeepBlobBuffersPerVCPU: 1"), -1), 0666) + c.Assert(err, IsNil) + s.testRunTrivialContainer(c) c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*not starting a local keepstore process because a volume \(zzzzz-nyw5e-00000000000000\d\) uses AccessViaHosts\n.*`) // Check that config read errors are logged @@@ -255,7 -248,7 +260,7 @@@ s.SetUpTest(c) s.args = []string{"-config", c.MkDir() + "/config-unreadable.yaml"} s.stdin.Reset() - err := ioutil.WriteFile(s.args[1], []byte{}, 0) + err = ioutil.WriteFile(s.args[1], []byte{}, 0) c.Check(err, IsNil) s.testRunTrivialContainer(c) c.Check(s.logFiles["crunch-run.txt"], Matches, `(?ms).*could not load config file \Q`+s.args[1]+`\E:.* permission denied\n.*`) @@@ -270,7 -263,7 +275,7 @@@ func (s *integrationSuite) testRunTrivi if err := exec.Command("which", s.engine).Run(); err != nil { c.Skip(fmt.Sprintf("%s: %s", s.engine, err)) } - s.cr.Command = []string{"sh", "-c", "cat /mnt/in/inputfile >/mnt/out/inputfile && cat /mnt/json >/mnt/out/json && ! touch /mnt/in/shouldbereadonly && mkdir /mnt/out/emptydir"} + s.cr.Command = []string{"sh", "-c", "env >&2 && cat /mnt/in/inputfile >/mnt/out/inputfile && cat /mnt/json >/mnt/out/json && ! touch /mnt/in/shouldbereadonly && mkdir /mnt/out/emptydir"} s.setup(c) args := []string{ diff --combined sdk/python/tests/run_test_server.py index 2b6684ff57,28cb0953f3..e32d385f73 --- a/sdk/python/tests/run_test_server.py +++ b/sdk/python/tests/run_test_server.py @@@ -635,6 -635,7 +635,7 @@@ def run_nginx() return stop_nginx() nginxconf = {} + nginxconf['UPSTREAMHOST'] = 'localhost' nginxconf['LISTENHOST'] = 'localhost' nginxconf['CONTROLLERPORT'] = internal_port_from_config("Controller") nginxconf['ARVADOS_API_HOST'] = "0.0.0.0:" + str(external_port_from_config("Controller")) @@@ -830,7 -831,6 +831,7 @@@ def setup_config() "JobsAPI": { "GitInternalDir": os.path.join(SERVICES_SRC_DIR, 'api', 'tmp', 'internal.git'), }, + "LocalKeepBlobBuffersPerVCPU": 0, "SupportedDockerImageFormats": {"v1": {}}, "ShellAccess": { "Admin": True,