X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/a437b5c897b7630dc7f18955242b8cd2d20b682e..3e483c4428580bf9860c76d1676b03f63d5143dc:/lib/diagnostics/cmd.go diff --git a/lib/diagnostics/cmd.go b/lib/diagnostics/cmd.go index 8d89b84d37..0fd3b3eca2 100644 --- a/lib/diagnostics/cmd.go +++ b/lib/diagnostics/cmd.go @@ -8,7 +8,9 @@ import ( "archive/tar" "bytes" "context" + "crypto/sha256" _ "embed" + "encoding/json" "flag" "fmt" "io" @@ -17,6 +19,8 @@ import ( "net/http" "net/url" "os" + "os/exec" + "regexp" "strings" "time" @@ -33,9 +37,10 @@ type Command struct{} func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int { var diag diagnoser f := flag.NewFlagSet(prog, flag.ContinueOnError) - f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "name of project to find/create in home project and use for temporary/test objects") - f.StringVar(&diag.logLevel, "log-level", "info", "logging level (debug, info, warning, error)") - f.StringVar(&diag.dockerImage, "docker-image", "", "image to use when running a test container (default: use embedded hello-world image)") + f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "`name` of project to find/create in home project and use for temporary/test objects") + f.StringVar(&diag.logLevel, "log-level", "info", "logging `level` (debug, info, warning, error)") + f.StringVar(&diag.dockerImage, "docker-image", "", "`image` (tag or portable data hash) to use when running a test container, or \"hello-world\" to use embedded hello-world image (default: build a custom image containing this executable, and run diagnostics inside the container too)") + f.StringVar(&diag.dockerImageFrom, "docker-image-from", "debian:stable-slim", "`base` image to use when building a custom image (see https://doc.arvados.org/main/admin/diagnostics.html#container-options)") f.BoolVar(&diag.checkInternal, "internal-client", false, "check that this host is considered an \"internal\" client") f.BoolVar(&diag.checkExternal, "external-client", false, "check that this host is considered an \"external\" client") f.BoolVar(&diag.verbose, "v", false, "verbose: include more information in report") @@ -44,6 +49,8 @@ func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s if ok, code := cmd.ParseFlags(f, prog, args, "", stderr); !ok { return code } + diag.stdout = stdout + diag.stderr = stderr diag.logger = ctxlog.New(stdout, "text", diag.logLevel) diag.logger.SetFormatter(&logrus.TextFormatter{DisableTimestamp: true, DisableLevelTruncation: true, PadLevelText: true}) diag.runtests() @@ -67,19 +74,20 @@ func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s var HelloWorldDockerImage []byte type diagnoser struct { - stdout io.Writer - stderr io.Writer - logLevel string - priority int - projectName string - dockerImage string - checkInternal bool - checkExternal bool - verbose bool - timeout time.Duration - logger *logrus.Logger - errors []string - done map[int]bool + stdout io.Writer + stderr io.Writer + logLevel string + priority int + projectName string + dockerImage string + dockerImageFrom string + checkInternal bool + checkExternal bool + verbose bool + timeout time.Duration + logger *logrus.Logger + errors []string + done map[int]bool } func (diag *diagnoser) debugf(f string, args ...interface{}) { @@ -444,38 +452,100 @@ func (diag *diagnoser) runtests() { }() } - // Read hello-world.tar to find image ID, so we can upload it - // as "sha256:{...}.tar" + tempdir, err := ioutil.TempDir("", "arvados-diagnostics") + if err != nil { + diag.errorf("error creating temp dir: %s", err) + return + } + defer os.RemoveAll(tempdir) + var imageSHA2 string - { - tr := tar.NewReader(bytes.NewReader(HelloWorldDockerImage)) - for { - hdr, err := tr.Next() - if err == io.EOF { - break - } + var dockerImageData []byte + if diag.dockerImage != "" || diag.priority < 1 { + // We won't be using the self-built docker image, so + // don't build it. But we will write the embedded + // "hello-world" image to our test collection to test + // upload/download, whether or not we're using it as a + // docker image. + dockerImageData = HelloWorldDockerImage + + if diag.priority > 0 { + imageSHA2, err = getSHA2FromImageData(dockerImageData) if err != nil { - diag.errorf("internal error/bug: cannot read embedded docker image tar file: %s", err) + diag.errorf("internal error/bug: %s", err) return } - if s := strings.TrimSuffix(hdr.Name, ".json"); len(s) == 64 && s != hdr.Name { - imageSHA2 = s - } } - if imageSHA2 == "" { - diag.errorf("internal error/bug: cannot find {sha256}.json file in embedded docker image tar file") + } else if selfbin, err := os.Readlink("/proc/self/exe"); err != nil { + diag.errorf("readlink /proc/self/exe: %s", err) + return + } else if selfbindata, err := os.ReadFile(selfbin); err != nil { + diag.errorf("error reading %s: %s", selfbin, err) + return + } else { + selfbinSha := fmt.Sprintf("%x", sha256.Sum256(selfbindata)) + tag := "arvados-client-diagnostics:" + selfbinSha[:9] + err := os.WriteFile(tempdir+"/arvados-client", selfbindata, 0777) + if err != nil { + diag.errorf("error writing %s: %s", tempdir+"/arvados-client", err) + return + } + + dockerfile := "FROM " + diag.dockerImageFrom + "\n" + dockerfile += "RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install --yes --no-install-recommends libfuse2 ca-certificates && apt-get clean\n" + dockerfile += "COPY /arvados-client /arvados-client\n" + cmd := exec.Command("docker", "build", "--tag", tag, "-f", "-", tempdir) + cmd.Stdin = strings.NewReader(dockerfile) + cmd.Stdout = diag.stderr + cmd.Stderr = diag.stderr + err = cmd.Run() + if err != nil { + diag.errorf("error building docker image: %s", err) + return + } + checkversion, err := exec.Command("docker", "run", tag, "/arvados-client", "version").CombinedOutput() + if err != nil { + diag.errorf("docker image does not seem to work: %s", err) + return + } + diag.infof("arvados-client version: %s", checkversion) + + buf, err := exec.Command("docker", "inspect", "--format={{.Id}}", tag).Output() + if err != nil { + diag.errorf("docker inspect --format={{.Id}} %s: %s", tag, err) + return + } + imageSHA2 = min64HexDigits.FindString(string(buf)) + if len(imageSHA2) != 64 { + diag.errorf("docker inspect --format={{.Id}} output %q does not seem to contain sha256 digest", buf) + return + } + + buf, err = exec.Command("docker", "save", tag).Output() + if err != nil { + diag.errorf("docker save %s: %s", tag, err) return } + diag.infof("docker image size is %d", len(buf)) + dockerImageData = buf } + tarfilename := "sha256:" + imageSHA2 + ".tar" diag.dotest(100, "uploading file via webdav", func() error { - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) + timeout := diag.timeout + if len(dockerImageData) > 10<<20 && timeout < time.Minute { + // Extend the normal http timeout if we're + // uploading a substantial docker image. + timeout = time.Minute + } + ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(timeout)) defer cancel() if collection.UUID == "" { return fmt.Errorf("skipping, no test collection") } - req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage)) + t0 := time.Now() + req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(dockerImageData)) if err != nil { return fmt.Errorf("BUG? http.NewRequest: %s", err) } @@ -488,12 +558,12 @@ func (diag *diagnoser) runtests() { if resp.StatusCode != http.StatusCreated { return fmt.Errorf("status %s", resp.Status) } - diag.debugf("ok, status %s", resp.Status) + diag.verbosef("upload ok, status %s, %f MB/s", resp.Status, float64(len(dockerImageData))/time.Since(t0).Seconds()/1000000) err = client.RequestAndDecodeContext(ctx, &collection, "GET", "arvados/v1/collections/"+collection.UUID, nil, nil) if err != nil { return fmt.Errorf("get updated collection: %s", err) } - diag.debugf("ok, pdh %s", collection.PortableDataHash) + diag.verbosef("upload pdh %s", collection.PortableDataHash) return nil }) @@ -549,7 +619,7 @@ func (diag *diagnoser) runtests() { if resp.StatusCode != trial.status { return fmt.Errorf("unexpected response status: %s", resp.Status) } - if trial.status == http.StatusOK && !bytes.Equal(body, HelloWorldDockerImage) { + if trial.status == http.StatusOK && !bytes.Equal(body, dockerImageData) { excerpt := body if len(excerpt) > 128 { excerpt = append([]byte(nil), body[:128]...) @@ -578,35 +648,6 @@ func (diag *diagnoser) runtests() { return nil }) - diag.dotest(140, "getting workbench1 webshell page", func() error { - ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) - defer cancel() - if vm.UUID == "" { - diag.warnf("skipping, no vm available") - return nil - } - webshelltermurl := cluster.Services.Workbench1.ExternalURL.String() + "virtual_machines/" + vm.UUID + "/webshell/testusername" - diag.debugf("url %s", webshelltermurl) - req, err := http.NewRequestWithContext(ctx, "GET", webshelltermurl, nil) - if err != nil { - return err - } - req.Header.Set("Authorization", "Bearer "+client.AuthToken) - resp, err := http.DefaultClient.Do(req) - if err != nil { - return err - } - defer resp.Body.Close() - body, err := ioutil.ReadAll(resp.Body) - if err != nil { - return fmt.Errorf("reading response: %s", err) - } - if resp.StatusCode != http.StatusOK { - return fmt.Errorf("unexpected response status: %s %q", resp.Status, body) - } - return nil - }) - diag.dotest(150, "connecting to webshell service", func() error { ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout)) defer cancel() @@ -662,13 +703,26 @@ func (diag *diagnoser) runtests() { } timestamp := time.Now().Format(time.RFC3339) - ctrCommand := []string{"echo", timestamp} - if diag.dockerImage == "" { + + var ctrCommand []string + switch diag.dockerImage { + case "": + if collection.UUID == "" { + return fmt.Errorf("skipping, no test collection to use as docker image") + } + diag.dockerImage = collection.PortableDataHash + ctrCommand = []string{"/arvados-client", "diagnostics", + "-priority=0", // don't run a container + "-log-level=" + diag.logLevel, + "-internal-client=true"} + case "hello-world": if collection.UUID == "" { return fmt.Errorf("skipping, no test collection to use as docker image") } diag.dockerImage = collection.PortableDataHash ctrCommand = []string{"/hello"} + default: + ctrCommand = []string{"echo", timestamp} } var cr arvados.ContainerRequest @@ -692,15 +746,16 @@ func (diag *diagnoser) runtests() { }, }, "runtime_constraints": arvados.RuntimeConstraints{ + API: true, VCPUs: 1, - RAM: 1 << 26, - KeepCacheRAM: 1 << 26, + RAM: 128 << 20, + KeepCacheRAM: 64 << 20, }, }}) if err != nil { return err } - diag.verbosef("container request uuid = %s", cr.UUID) + diag.infof("container request uuid = %s", cr.UUID) diag.verbosef("container uuid = %s", cr.ContainerUUID) timeout := 10 * time.Minute @@ -754,3 +809,36 @@ func (diag *diagnoser) runtests() { return nil }) } + +func getSHA2FromImageData(dockerImageData []byte) (string, error) { + tr := tar.NewReader(bytes.NewReader(dockerImageData)) + for { + hdr, err := tr.Next() + if err == io.EOF { + return "", fmt.Errorf("cannot find manifest.json in docker image tar file") + } + if err != nil { + return "", fmt.Errorf("cannot read docker image tar file: %s", err) + } + if hdr.Name != "manifest.json" { + continue + } + var manifest []struct { + Config string + } + err = json.NewDecoder(tr).Decode(&manifest) + if err != nil { + return "", fmt.Errorf("cannot read manifest.json from docker image tar file: %s", err) + } + if len(manifest) == 0 { + return "", fmt.Errorf("manifest.json is empty") + } + s := min64HexDigits.FindString(manifest[0].Config) + if len(s) != 64 { + return "", fmt.Errorf("found manifest.json but .[0].Config %q does not seem to contain sha256 digest", manifest[0].Config) + } + return s, nil + } +} + +var min64HexDigits = regexp.MustCompile(`[0-9a-f]{64,}`)