From: Tom Clegg Date: Sat, 21 May 2022 01:28:53 +0000 (-0400) Subject: 15370: Merge branch 'main' into 15370-loopback-dispatchcloud X-Git-Tag: 2.5.0~142^2~8 X-Git-Url: https://git.arvados.org/arvados.git/commitdiff_plain/3fa6aa4043286ad61e5f29c136d3cc2942e8750d?hp=-c 15370: Merge branch 'main' into 15370-loopback-dispatchcloud Arvados-DCO-1.1-Signed-off-by: Tom Clegg --- 3fa6aa4043286ad61e5f29c136d3cc2942e8750d diff --combined build/run-tests.sh index 9f5d37322e,4fbb4e6f04..cc8e070a5c --- a/build/run-tests.sh +++ b/build/run-tests.sh @@@ -269,7 -269,13 +269,13 @@@ sanity_checks() echo -n 'graphviz: ' dot -V || fatal "No graphviz. Try: apt-get install graphviz" echo -n 'geckodriver: ' - geckodriver --version | grep ^geckodriver || echo "No geckodriver. Try: wget -O- https://github.com/mozilla/geckodriver/releases/download/v0.23.0/geckodriver-v0.23.0-linux64.tar.gz | sudo tar -C /usr/local/bin -xzf - geckodriver" + geckodriver --version | grep ^geckodriver || echo "No geckodriver. Try: arvados-server install" + echo -n 'singularity: ' + singularity --version || fatal "No singularity. Try: arvados-server install" + echo -n 'docker client: ' + docker --version || echo "No docker client. Try: arvados-server install" + echo -n 'docker server: ' + docker info --format='{{.ServerVersion}}' || echo "No docker server. Try: arvados-server install" if [[ "$NEED_SDK_R" = true ]]; then # R SDK stuff @@@ -736,7 -742,7 +742,7 @@@ do_test() go_ldflags() { version=${ARVADOS_VERSION:-$(git log -n1 --format=%H)-dev} - echo "-X git.arvados.org/arvados.git/lib/cmd.version=${version} -X main.version=${version}" + echo "-X git.arvados.org/arvados.git/lib/cmd.version=${version} -X main.version=${version} -s -w" } do_test_once() { diff --combined lib/config/config.default.yml index d965633055,958171d09f..f6e9910161 --- a/lib/config/config.default.yml +++ b/lib/config/config.default.yml @@@ -248,8 -248,9 +248,9 @@@ Clusters FreezeProjectRequiresDescription: false # Project properties that must have non-empty values in order to - # freeze a project. Example: {"property_name": true} - FreezeProjectRequiresProperties: {} + # freeze a project. Example: "property_name": {} + FreezeProjectRequiresProperties: + SAMPLE: {} # If true, only an admin user can un-freeze a project. If false, # any user with "manage" permission can un-freeze. @@@ -1281,9 -1282,7 +1282,9 @@@ # need to be detected and cleaned up manually. TagKeyPrefix: Arvados - # Cloud driver: "azure" (Microsoft Azure) or "ec2" (Amazon AWS). + # Cloud driver: "azure" (Microsoft Azure), "ec2" (Amazon AWS), + # or "loopback" (run containers on dispatch host for testing + # purposes). Driver: ec2 # Cloud-specific driver parameters. diff --combined lib/config/load.go index abb3a804ba,dba7997870..acc54cf92d --- a/lib/config/load.go +++ b/lib/config/load.go @@@ -16,7 -16,6 +16,7 @@@ import "io/ioutil" "os" "regexp" + "runtime" "strconv" "strings" "time" @@@ -26,7 -25,6 +26,7 @@@ "github.com/imdario/mergo" "github.com/prometheus/client_golang/prometheus" "github.com/sirupsen/logrus" + "golang.org/x/sys/unix" ) //go:embed config.default.yml @@@ -299,10 -297,7 +299,10 @@@ func (ldr *Loader) Load() (*arvados.Con ldr.loadOldKeepBalanceConfig, ) } - loadFuncs = append(loadFuncs, ldr.setImplicitStorageClasses) + loadFuncs = append(loadFuncs, + ldr.setImplicitStorageClasses, + ldr.setLoopbackInstanceType, + ) for _, f := range loadFuncs { err = f(&cfg) if err != nil { @@@ -345,6 -340,7 +345,7 @@@ ldr.checkEnum("Containers.LocalKeepLogsToContainerLog", cc.Containers.LocalKeepLogsToContainerLog, "none", "all", "errors"), ldr.checkEmptyKeepstores(cc), ldr.checkUnlistedKeepstores(cc), + ldr.checkLocalKeepBlobBuffers(cc), ldr.checkStorageClasses(cc), ldr.checkCUDAVersions(cc), // TODO: check non-empty Rendezvous on @@@ -419,67 -415,6 +420,67 @@@ func (ldr *Loader) checkEnum(label, val return fmt.Errorf("%s: unacceptable value %q: must be one of %q", label, value, accepted) } +func (ldr *Loader) setLoopbackInstanceType(cfg *arvados.Config) error { + for id, cc := range cfg.Clusters { + if !cc.Containers.CloudVMs.Enable || cc.Containers.CloudVMs.Driver != "loopback" { + continue + } + if len(cc.InstanceTypes) == 1 { + continue + } + if len(cc.InstanceTypes) > 1 { + return fmt.Errorf("Clusters.%s.InstanceTypes: cannot use multiple InstanceTypes with loopback driver", id) + } + // No InstanceTypes configured. Fill in implicit + // default. + hostram, err := getHostRAM() + if err != nil { + return err + } + scratch, err := getFilesystemSize(os.TempDir()) + if err != nil { + return err + } + cc.InstanceTypes = arvados.InstanceTypeMap{"localhost": { + Name: "localhost", + ProviderType: "localhost", + VCPUs: runtime.NumCPU(), + RAM: hostram, + Scratch: scratch, + IncludedScratch: scratch, + }} + cfg.Clusters[id] = cc + } + return nil +} + +func getFilesystemSize(path string) (arvados.ByteSize, error) { + var stat unix.Statfs_t + err := unix.Statfs(path, &stat) + if err != nil { + return 0, err + } + return arvados.ByteSize(stat.Blocks * uint64(stat.Bsize)), nil +} + +var reMemTotal = regexp.MustCompile(`(^|\n)MemTotal: *(\d+) kB\n`) + +func getHostRAM() (arvados.ByteSize, error) { + buf, err := os.ReadFile("/proc/meminfo") + if err != nil { + return 0, err + } + m := reMemTotal.FindSubmatch(buf) + if m == nil { + return 0, errors.New("error parsing /proc/meminfo: no MemTotal") + } + kb, err := strconv.ParseInt(string(m[2]), 10, 64) + if err != nil { + return 0, fmt.Errorf("error parsing /proc/meminfo: %q: %w", m[2], err) + } + return arvados.ByteSize(kb) * 1024, nil +} + func (ldr *Loader) setImplicitStorageClasses(cfg *arvados.Config) error { cluster: for id, cc := range cfg.Clusters { @@@ -501,6 -436,24 +502,24 @@@ cfg.Clusters[id] = cc } return nil + } + + func (ldr *Loader) checkLocalKeepBlobBuffers(cc arvados.Cluster) error { + kbb := cc.Containers.LocalKeepBlobBuffersPerVCPU + if kbb == 0 { + return nil + } + for uuid, vol := range cc.Volumes { + if len(vol.AccessViaHosts) > 0 { + ldr.Logger.Warnf("LocalKeepBlobBuffersPerVCPU is %d but will not be used because at least one volume (%s) uses AccessViaHosts -- suggest changing to 0", kbb, uuid) + return nil + } + if !vol.ReadOnly && vol.Replication < cc.Collections.DefaultReplication { + ldr.Logger.Warnf("LocalKeepBlobBuffersPerVCPU is %d but will not be used because at least one volume (%s) has lower replication than DefaultReplication (%d < %d) -- suggest changing to 0", kbb, uuid, vol.Replication, cc.Collections.DefaultReplication) + return nil + } + } + return nil } func (ldr *Loader) checkStorageClasses(cc arvados.Cluster) error { diff --combined lib/config/load_test.go index 256e8a3e8c,feb05cb951..fb9792632c --- a/lib/config/load_test.go +++ b/lib/config/load_test.go @@@ -13,7 -13,6 +13,7 @@@ import "os/exec" "reflect" "regexp" + "runtime" "strings" "testing" "time" @@@ -238,7 -237,7 +238,7 @@@ Clusters InternalURLs: "http://host.example:12345": {} Volumes: - zzzzz-nyw5e-aaaaaaaaaaaaaaa: {} + zzzzz-nyw5e-aaaaaaaaaaaaaaa: {Replication: 2} `, &logbuf).Load() c.Assert(err, check.IsNil) c.Log(logbuf.String()) @@@ -602,55 -601,31 +602,80 @@@ func (s *LoadSuite) TestListKeys(c *che } } +func (s *LoadSuite) TestLoopbackInstanceTypes(c *check.C) { + ldr := testLoader(c, ` +Clusters: + z1111: + Containers: + CloudVMs: + Enable: true + Driver: loopback + InstanceTypes: + a: {} + b: {} +`, nil) + cfg, err := ldr.Load() + c.Check(err, check.ErrorMatches, `Clusters\.z1111\.InstanceTypes: cannot use multiple InstanceTypes with loopback driver`) + + ldr = testLoader(c, ` +Clusters: + z1111: + Containers: + CloudVMs: + Enable: true + Driver: loopback +`, nil) + cfg, err = ldr.Load() + c.Assert(err, check.IsNil) + cc, err := cfg.GetCluster("") + c.Assert(err, check.IsNil) + c.Check(cc.InstanceTypes, check.HasLen, 1) + c.Check(cc.InstanceTypes["localhost"].VCPUs, check.Equals, runtime.NumCPU()) + + ldr = testLoader(c, ` +Clusters: + z1111: + Containers: + CloudVMs: + Enable: true + Driver: loopback + InstanceTypes: + a: + VCPUs: 9 +`, nil) + cfg, err = ldr.Load() + c.Assert(err, check.IsNil) + cc, err = cfg.GetCluster("") + c.Assert(err, check.IsNil) + c.Check(cc.InstanceTypes, check.HasLen, 1) + c.Check(cc.InstanceTypes["a"].VCPUs, check.Equals, 9) +} + + func (s *LoadSuite) TestWarnUnusedLocalKeep(c *check.C) { + var logbuf bytes.Buffer + _, err := testLoader(c, ` + Clusters: + z1111: + Volumes: + z: + Replication: 1 + `, &logbuf).Load() + c.Assert(err, check.IsNil) + c.Check(logbuf.String(), check.Matches, `(?ms).*LocalKeepBlobBuffersPerVCPU is 1 but will not be used because at least one volume \(z\) has lower replication than DefaultReplication \(1 < 2\) -- suggest changing to 0.*`) + + logbuf.Reset() + _, err = testLoader(c, ` + Clusters: + z1111: + Volumes: + z: + AccessViaHosts: + "http://0.0.0.0:12345": {} + `, &logbuf).Load() + c.Assert(err, check.IsNil) + c.Check(logbuf.String(), check.Matches, `(?ms).*LocalKeepBlobBuffersPerVCPU is 1 but will not be used because at least one volume \(z\) uses AccessViaHosts -- suggest changing to 0.*`) + } + func (s *LoadSuite) TestImplicitStorageClasses(c *check.C) { // If StorageClasses and Volumes.*.StorageClasses are all // empty, there is a default storage class named "default". @@@ -874,16 -849,3 +899,16 @@@ arvados_config_source_timestamp_seconds `) } } + +func (s *LoadSuite) TestGetHostRAM(c *check.C) { + hostram, err := getHostRAM() + c.Check(err, check.IsNil) + c.Logf("getHostRAM() == %v", hostram) +} + +func (s *LoadSuite) TestGetFilesystemSize(c *check.C) { + path := c.MkDir() + size, err := getFilesystemSize(path) + c.Check(err, check.IsNil) + c.Logf("getFilesystemSize(%q) == %v", path, size) +} diff --combined lib/crunchrun/executor_test.go index 5b146a6321,fc9f5b36e7..ea6e610d8b --- a/lib/crunchrun/executor_test.go +++ b/lib/crunchrun/executor_test.go @@@ -6,16 -6,46 +6,20 @@@ package crunchru import ( "bytes" + "fmt" "io" - "io/ioutil" + "net" + "net/http" + "os" "strings" "time" "git.arvados.org/arvados.git/sdk/go/arvados" + "git.arvados.org/arvados.git/sdk/go/arvadostest" "golang.org/x/net/context" . "gopkg.in/check.v1" ) -func busyboxDockerImage(c *C) string { - fnm := "busybox_uclibc.tar" - cachedir := c.MkDir() - cachefile := cachedir + "/" + fnm - if _, err := os.Stat(cachefile); err == nil { - return cachefile - } - - f, err := ioutil.TempFile(cachedir, "") - c.Assert(err, IsNil) - defer f.Close() - defer os.Remove(f.Name()) - - resp, err := http.Get("https://cache.arvados.org/" + fnm) - c.Assert(err, IsNil) - defer resp.Body.Close() - _, err = io.Copy(f, resp.Body) - c.Assert(err, IsNil) - err = f.Close() - c.Assert(err, IsNil) - err = os.Rename(f.Name(), cachefile) - c.Assert(err, IsNil) - - return cachefile -} - type nopWriteCloser struct{ io.Writer } func (nopWriteCloser) Close() error { return nil } @@@ -43,7 -73,7 +47,7 @@@ func (s *executorSuite) SetUpTest(c *C Stdout: nopWriteCloser{&s.stdout}, Stderr: nopWriteCloser{&s.stderr}, } - err := s.executor.LoadImage("", busyboxDockerImage(c), arvados.Container{}, "", nil) + err := s.executor.LoadImage("", arvadostest.BusyboxDockerImage(c), arvados.Container{}, "", nil) c.Assert(err, IsNil) } @@@ -147,6 -177,91 +151,91 @@@ func (s *executorSuite) TestExecStdoutS c.Check(s.stderr.String(), Equals, "barwaz\n") } + func (s *executorSuite) TestIPAddress(c *C) { + // Listen on an available port on the host. + ln, err := net.Listen("tcp", net.JoinHostPort("0.0.0.0", "0")) + c.Assert(err, IsNil) + defer ln.Close() + _, port, err := net.SplitHostPort(ln.Addr().String()) + c.Assert(err, IsNil) + + // Start a container that listens on the same port number that + // is already in use on the host. + s.spec.Command = []string{"nc", "-l", "-p", port, "-e", "printf", `HTTP/1.1 418 I'm a teapot\r\n\r\n`} + s.spec.EnableNetwork = true + c.Assert(s.executor.Create(s.spec), IsNil) + c.Assert(s.executor.Start(), IsNil) + starttime := time.Now() + + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(10*time.Second)) + defer cancel() + + for ctx.Err() == nil { + time.Sleep(time.Second / 10) + _, err := s.executor.IPAddress() + if err == nil { + break + } + } + // When we connect to the port using s.executor.IPAddress(), + // we should reach the nc process running inside the + // container, not the net.Listen() running outside the + // container, even though both listen on the same port. + ip, err := s.executor.IPAddress() + if c.Check(err, IsNil) && c.Check(ip, Not(Equals), "") { + req, err := http.NewRequest("BREW", "http://"+net.JoinHostPort(ip, port), nil) + c.Assert(err, IsNil) + resp, err := http.DefaultClient.Do(req) + c.Assert(err, IsNil) + c.Check(resp.StatusCode, Equals, http.StatusTeapot) + } + + s.executor.Stop() + code, _ := s.executor.Wait(ctx) + c.Logf("container ran for %v", time.Now().Sub(starttime)) + c.Check(code, Equals, -1) + + c.Logf("stdout:\n%s\n\n", s.stdout.String()) + c.Logf("stderr:\n%s\n\n", s.stderr.String()) + } + + func (s *executorSuite) TestInject(c *C) { + hostdir := c.MkDir() + c.Assert(os.WriteFile(hostdir+"/testfile", []byte("first tube"), 0777), IsNil) + mountdir := fmt.Sprintf("/injecttest-%d", os.Getpid()) + s.spec.Command = []string{"sleep", "10"} + s.spec.BindMounts = map[string]bindmount{mountdir: {HostPath: hostdir, ReadOnly: true}} + c.Assert(s.executor.Create(s.spec), IsNil) + c.Assert(s.executor.Start(), IsNil) + starttime := time.Now() + + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(2*time.Second)) + defer cancel() + + // Allow InjectCommand to fail a few times while the container + // is starting + for ctx.Err() == nil { + _, err := s.executor.InjectCommand(ctx, "", "root", false, []string{"true"}) + if err == nil { + break + } + time.Sleep(time.Second / 10) + } + + injectcmd := []string{"cat", mountdir + "/testfile"} + cmd, err := s.executor.InjectCommand(ctx, "", "root", false, injectcmd) + c.Assert(err, IsNil) + out, err := cmd.CombinedOutput() + c.Logf("inject %s => %q", injectcmd, out) + c.Check(err, IsNil) + c.Check(string(out), Equals, "first tube") + + s.executor.Stop() + code, _ := s.executor.Wait(ctx) + c.Logf("container ran for %v", time.Now().Sub(starttime)) + c.Check(code, Equals, -1) + } + func (s *executorSuite) checkRun(c *C, expectCode int) { c.Assert(s.executor.Create(s.spec), IsNil) c.Assert(s.executor.Start(), IsNil) diff --combined lib/crunchrun/integration_test.go index ce92a9b807,2ba7556cb5..96ba576a49 --- a/lib/crunchrun/integration_test.go +++ b/lib/crunchrun/integration_test.go @@@ -51,7 -51,7 +51,7 @@@ func (s *integrationSuite) SetUpSuite( arvadostest.StartKeep(2, true) - out, err := exec.Command("docker", "load", "--input", busyboxDockerImage(c)).CombinedOutput() + out, err := exec.Command("docker", "load", "--input", arvadostest.BusyboxDockerImage(c)).CombinedOutput() c.Log(string(out)) c.Assert(err, IsNil) out, err = exec.Command("arv-keepdocker", "--no-resume", "busybox:uclibc").Output() @@@ -249,6 -249,9 +249,9 @@@ func (s *integrationSuite) testRunTrivi if err := exec.Command("which", s.engine).Run(); err != nil { c.Skip(fmt.Sprintf("%s: %s", s.engine, err)) } + if s.engine == "docker" && os.Getenv("ENABLE_DOCKER_TESTS") == "" { + c.Skip("docker tests temporarily disabled if ENABLE_DOCKER_TESTS is not set, see https://dev.arvados.org/issues/15370#note-31") + } s.cr.Command = []string{"sh", "-c", "cat /mnt/in/inputfile >/mnt/out/inputfile && cat /mnt/json >/mnt/out/json && ! touch /mnt/in/shouldbereadonly && mkdir /mnt/out/emptydir"} s.setup(c) diff --combined sdk/go/arvados/container.go index d6d75192f0,de709980fd..466221fe19 --- a/sdk/go/arvados/container.go +++ b/sdk/go/arvados/container.go @@@ -37,7 -37,6 +37,7 @@@ type Container struct RuntimeAuthScopes []string `json:"runtime_auth_scopes"` RuntimeToken string `json:"runtime_token"` AuthUUID string `json:"auth_uuid"` + Log string `json:"log"` } // ContainerRequest is an arvados#container_request resource. @@@ -76,6 -75,7 +76,7 @@@ type ContainerRequest struct Filters []Filter `json:"filters"` ContainerCount int `json:"container_count"` OutputStorageClasses []string `json:"output_storage_classes"` + OutputProperties map[string]interface{} `json:"output_properties"` } // Mount is special behavior to attach to a filesystem path or device.