c944db66607d58a612cf2febdc17816477786d42
[arvados.git] / lib / diagnostics / cmd.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package diagnostics
6
7 import (
8         "archive/tar"
9         "bytes"
10         "context"
11         _ "embed"
12         "flag"
13         "fmt"
14         "io"
15         "io/ioutil"
16         "net"
17         "net/http"
18         "net/url"
19         "os"
20         "strings"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/config"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/ctxlog"
27         "git.arvados.org/arvados.git/sdk/go/health"
28         "github.com/sirupsen/logrus"
29 )
30
31 type Command struct{}
32
33 func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
34         var diag diagnoser
35         f := flag.NewFlagSet(prog, flag.ContinueOnError)
36         f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "name of project to find/create in home project and use for temporary/test objects")
37         f.StringVar(&diag.logLevel, "log-level", "info", "logging level (debug, info, warning, error)")
38         f.StringVar(&diag.dockerImage, "docker-image", "", "image to use when running a test container (default: use embedded hello-world image)")
39         f.BoolVar(&diag.checkInternal, "internal-client", false, "check that this host is considered an \"internal\" client")
40         f.BoolVar(&diag.checkExternal, "external-client", false, "check that this host is considered an \"external\" client")
41         f.BoolVar(&diag.verbose, "v", false, "verbose: include more information in report")
42         f.IntVar(&diag.priority, "priority", 500, "priority for test container (1..1000, or 0 to skip)")
43         f.DurationVar(&diag.timeout, "timeout", 10*time.Second, "timeout for http requests")
44         if ok, code := cmd.ParseFlags(f, prog, args, "", stderr); !ok {
45                 return code
46         }
47         diag.logger = ctxlog.New(stdout, "text", diag.logLevel)
48         diag.logger.SetFormatter(&logrus.TextFormatter{DisableTimestamp: true, DisableLevelTruncation: true, PadLevelText: true})
49         diag.runtests()
50         if len(diag.errors) == 0 {
51                 diag.logger.Info("--- no errors ---")
52                 return 0
53         } else {
54                 if diag.logger.Level > logrus.ErrorLevel {
55                         fmt.Fprint(stdout, "\n--- cut here --- error summary ---\n\n")
56                         for _, e := range diag.errors {
57                                 diag.logger.Error(e)
58                         }
59                 }
60                 return 1
61         }
62 }
63
64 // docker save hello-world > hello-world.tar
65 //
66 //go:embed hello-world.tar
67 var HelloWorldDockerImage []byte
68
69 type diagnoser struct {
70         stdout        io.Writer
71         stderr        io.Writer
72         logLevel      string
73         priority      int
74         projectName   string
75         dockerImage   string
76         checkInternal bool
77         checkExternal bool
78         verbose       bool
79         timeout       time.Duration
80         logger        *logrus.Logger
81         errors        []string
82         done          map[int]bool
83 }
84
85 func (diag *diagnoser) debugf(f string, args ...interface{}) {
86         diag.logger.Debugf("  ... "+f, args...)
87 }
88
89 func (diag *diagnoser) infof(f string, args ...interface{}) {
90         diag.logger.Infof("  ... "+f, args...)
91 }
92
93 func (diag *diagnoser) verbosef(f string, args ...interface{}) {
94         if diag.verbose {
95                 diag.logger.Infof("  ... "+f, args...)
96         }
97 }
98
99 func (diag *diagnoser) warnf(f string, args ...interface{}) {
100         diag.logger.Warnf("  ... "+f, args...)
101 }
102
103 func (diag *diagnoser) errorf(f string, args ...interface{}) {
104         diag.logger.Errorf(f, args...)
105         diag.errors = append(diag.errors, fmt.Sprintf(f, args...))
106 }
107
108 // Run the given func, logging appropriate messages before and after,
109 // adding timing info, etc.
110 //
111 // The id argument should be unique among tests, and shouldn't change
112 // when other tests are added/removed.
113 func (diag *diagnoser) dotest(id int, title string, fn func() error) {
114         if diag.done == nil {
115                 diag.done = map[int]bool{}
116         } else if diag.done[id] {
117                 diag.errorf("(bug) reused test id %d", id)
118         }
119         diag.done[id] = true
120
121         diag.logger.Infof("%4d: %s", id, title)
122         t0 := time.Now()
123         err := fn()
124         elapsed := fmt.Sprintf("%d ms", time.Now().Sub(t0)/time.Millisecond)
125         if err != nil {
126                 diag.errorf("%4d: %s (%s): %s", id, title, elapsed, err)
127         } else {
128                 diag.logger.Debugf("%4d: %s (%s): ok", id, title, elapsed)
129         }
130 }
131
132 func (diag *diagnoser) runtests() {
133         client := arvados.NewClientFromEnv()
134
135         if client.APIHost == "" || client.AuthToken == "" {
136                 diag.errorf("ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are not set -- aborting without running any tests")
137                 return
138         }
139
140         hostname, err := os.Hostname()
141         if err != nil {
142                 diag.warnf("error getting hostname: %s")
143         } else {
144                 diag.verbosef("hostname = %s", hostname)
145         }
146
147         diag.dotest(5, "running health check (same as `arvados-server check`)", func() error {
148                 ldr := config.NewLoader(&bytes.Buffer{}, ctxlog.New(&bytes.Buffer{}, "text", "info"))
149                 ldr.SetupFlags(flag.NewFlagSet("diagnostics", flag.ContinueOnError))
150                 cfg, err := ldr.Load()
151                 if err != nil {
152                         diag.infof("skipping because config could not be loaded: %s", err)
153                         return nil
154                 }
155                 cluster, err := cfg.GetCluster("")
156                 if err != nil {
157                         return err
158                 }
159                 if cluster.SystemRootToken != os.Getenv("ARVADOS_API_TOKEN") {
160                         diag.infof("skipping because SystemRootToken read from %s does not match $ARVADOS_API_TOKEN (consider using 'arvados-client sudo diagnostics' to load endpoint and token from config file instead of environment)", ldr.Path)
161                         return nil
162                 }
163                 agg := &health.Aggregator{Cluster: cluster}
164                 resp := agg.ClusterHealth()
165                 for _, e := range resp.Errors {
166                         diag.errorf("health check: %s", e)
167                 }
168                 if len(resp.Errors) > 0 {
169                         diag.infof("consider running `arvados-server check -yaml` for a comprehensive report")
170                 }
171                 diag.verbosef("reported clock skew = %v", resp.ClockSkew)
172                 reported := map[string]bool{}
173                 for _, result := range resp.Checks {
174                         version := strings.SplitN(result.Metrics.Version, " (go", 2)[0]
175                         if version != "" && !reported[version] {
176                                 diag.verbosef("arvados version = %s", version)
177                                 reported[version] = true
178                         }
179                 }
180                 reported = map[string]bool{}
181                 for _, result := range resp.Checks {
182                         if result.Server != "" && !reported[result.Server] {
183                                 diag.verbosef("http frontend version = %s", result.Server)
184                                 reported[result.Server] = true
185                         }
186                 }
187                 reported = map[string]bool{}
188                 for _, result := range resp.Checks {
189                         if sha := result.ConfigSourceSHA256; sha != "" && !reported[sha] {
190                                 diag.verbosef("config file sha256 = %s", sha)
191                                 reported[sha] = true
192                         }
193                 }
194                 return nil
195         })
196
197         var dd arvados.DiscoveryDocument
198         ddpath := "discovery/v1/apis/arvados/v1/rest"
199         diag.dotest(10, fmt.Sprintf("getting discovery document from https://%s/%s", client.APIHost, ddpath), func() error {
200                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
201                 defer cancel()
202                 err := client.RequestAndDecodeContext(ctx, &dd, "GET", ddpath, nil, nil)
203                 if err != nil {
204                         return err
205                 }
206                 diag.verbosef("BlobSignatureTTL = %d", dd.BlobSignatureTTL)
207                 return nil
208         })
209
210         var cluster arvados.Cluster
211         cfgpath := "arvados/v1/config"
212         cfgOK := false
213         diag.dotest(20, fmt.Sprintf("getting exported config from https://%s/%s", client.APIHost, cfgpath), func() error {
214                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
215                 defer cancel()
216                 err := client.RequestAndDecodeContext(ctx, &cluster, "GET", cfgpath, nil, nil)
217                 if err != nil {
218                         return err
219                 }
220                 diag.verbosef("Collections.BlobSigning = %v", cluster.Collections.BlobSigning)
221                 cfgOK = true
222                 return nil
223         })
224
225         var user arvados.User
226         diag.dotest(30, "getting current user record", func() error {
227                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
228                 defer cancel()
229                 err := client.RequestAndDecodeContext(ctx, &user, "GET", "arvados/v1/users/current", nil, nil)
230                 if err != nil {
231                         return err
232                 }
233                 diag.verbosef("user uuid = %s", user.UUID)
234                 return nil
235         })
236
237         if !cfgOK {
238                 diag.errorf("cannot proceed without cluster config -- aborting without running any further tests")
239                 return
240         }
241
242         // uncomment to create some spurious errors
243         // cluster.Services.WebDAVDownload.ExternalURL.Host = "0.0.0.0:9"
244
245         // TODO: detect routing errors here, like finding wb2 at the
246         // wb1 address.
247         for i, svc := range []*arvados.Service{
248                 &cluster.Services.Keepproxy,
249                 &cluster.Services.WebDAV,
250                 &cluster.Services.WebDAVDownload,
251                 &cluster.Services.Websocket,
252                 &cluster.Services.Workbench1,
253                 &cluster.Services.Workbench2,
254         } {
255                 diag.dotest(40+i, fmt.Sprintf("connecting to service endpoint %s", svc.ExternalURL), func() error {
256                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
257                         defer cancel()
258                         u := svc.ExternalURL
259                         if strings.HasPrefix(u.Scheme, "ws") {
260                                 // We can do a real websocket test elsewhere,
261                                 // but for now we'll just check the https
262                                 // connection.
263                                 u.Scheme = "http" + u.Scheme[2:]
264                         }
265                         if svc == &cluster.Services.WebDAV && strings.HasPrefix(u.Host, "*") {
266                                 u.Host = "d41d8cd98f00b204e9800998ecf8427e-0" + u.Host[1:]
267                         }
268                         req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
269                         if err != nil {
270                                 return err
271                         }
272                         resp, err := http.DefaultClient.Do(req)
273                         if err != nil {
274                                 return err
275                         }
276                         resp.Body.Close()
277                         return nil
278                 })
279         }
280
281         for i, url := range []string{
282                 cluster.Services.Controller.ExternalURL.String(),
283                 cluster.Services.Keepproxy.ExternalURL.String() + "d41d8cd98f00b204e9800998ecf8427e+0",
284                 cluster.Services.WebDAVDownload.ExternalURL.String(),
285         } {
286                 diag.dotest(50+i, fmt.Sprintf("checking CORS headers at %s", url), func() error {
287                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
288                         defer cancel()
289                         req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
290                         if err != nil {
291                                 return err
292                         }
293                         req.Header.Set("Origin", "https://example.com")
294                         resp, err := http.DefaultClient.Do(req)
295                         if err != nil {
296                                 return err
297                         }
298                         if hdr := resp.Header.Get("Access-Control-Allow-Origin"); hdr != "*" {
299                                 return fmt.Errorf("expected \"Access-Control-Allow-Origin: *\", got %q", hdr)
300                         }
301                         return nil
302                 })
303         }
304
305         var keeplist arvados.KeepServiceList
306         diag.dotest(60, "checking internal/external client detection", func() error {
307                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
308                 defer cancel()
309                 err := client.RequestAndDecodeContext(ctx, &keeplist, "GET", "arvados/v1/keep_services/accessible", nil, arvados.ListOptions{Limit: 999999})
310                 if err != nil {
311                         return fmt.Errorf("error getting keep services list: %s", err)
312                 } else if len(keeplist.Items) == 0 {
313                         return fmt.Errorf("controller did not return any keep services")
314                 }
315                 found := map[string]int{}
316                 for _, ks := range keeplist.Items {
317                         found[ks.ServiceType]++
318                 }
319                 isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0
320                 isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items)
321                 if isExternal {
322                         diag.verbosef("controller returned only proxy services, this host is treated as \"external\"")
323                 } else if isInternal {
324                         diag.verbosef("controller returned only non-proxy services, this host is treated as \"internal\"")
325                 }
326                 if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) {
327                         return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal)
328                 }
329                 return nil
330         })
331
332         for i, ks := range keeplist.Items {
333                 u := url.URL{
334                         Scheme: "http",
335                         Host:   net.JoinHostPort(ks.ServiceHost, fmt.Sprintf("%d", ks.ServicePort)),
336                         Path:   "/",
337                 }
338                 if ks.ServiceSSLFlag {
339                         u.Scheme = "https"
340                 }
341                 diag.dotest(61+i, fmt.Sprintf("reading+writing via keep service at %s", u.String()), func() error {
342                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
343                         defer cancel()
344                         req, err := http.NewRequestWithContext(ctx, "PUT", u.String()+"d41d8cd98f00b204e9800998ecf8427e", nil)
345                         if err != nil {
346                                 return err
347                         }
348                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
349                         resp, err := http.DefaultClient.Do(req)
350                         if err != nil {
351                                 return err
352                         }
353                         defer resp.Body.Close()
354                         body, err := ioutil.ReadAll(resp.Body)
355                         if err != nil {
356                                 return fmt.Errorf("reading response body: %s", err)
357                         }
358                         loc := strings.TrimSpace(string(body))
359                         if !strings.HasPrefix(loc, "d41d8") {
360                                 return fmt.Errorf("unexpected response from write: %q", body)
361                         }
362
363                         req, err = http.NewRequestWithContext(ctx, "GET", u.String()+loc, nil)
364                         if err != nil {
365                                 return err
366                         }
367                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
368                         resp, err = http.DefaultClient.Do(req)
369                         if err != nil {
370                                 return err
371                         }
372                         defer resp.Body.Close()
373                         body, err = ioutil.ReadAll(resp.Body)
374                         if err != nil {
375                                 return fmt.Errorf("reading response body: %s", err)
376                         }
377                         if len(body) != 0 {
378                                 return fmt.Errorf("unexpected response from read: %q", body)
379                         }
380
381                         return nil
382                 })
383         }
384
385         var project arvados.Group
386         diag.dotest(80, fmt.Sprintf("finding/creating %q project", diag.projectName), func() error {
387                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
388                 defer cancel()
389                 var grplist arvados.GroupList
390                 err := client.RequestAndDecodeContext(ctx, &grplist, "GET", "arvados/v1/groups", nil, arvados.ListOptions{
391                         Filters: []arvados.Filter{
392                                 {"name", "=", diag.projectName},
393                                 {"group_class", "=", "project"},
394                                 {"owner_uuid", "=", user.UUID}},
395                         Limit: 999999})
396                 if err != nil {
397                         return fmt.Errorf("list groups: %s", err)
398                 }
399                 if len(grplist.Items) > 0 {
400                         project = grplist.Items[0]
401                         diag.verbosef("using existing project, uuid = %s", project.UUID)
402                         return nil
403                 }
404                 diag.debugf("list groups: ok, no results")
405                 err = client.RequestAndDecodeContext(ctx, &project, "POST", "arvados/v1/groups", nil, map[string]interface{}{"group": map[string]interface{}{
406                         "name":        diag.projectName,
407                         "group_class": "project",
408                 }})
409                 if err != nil {
410                         return fmt.Errorf("create project: %s", err)
411                 }
412                 diag.verbosef("created project, uuid = %s", project.UUID)
413                 return nil
414         })
415
416         var collection arvados.Collection
417         diag.dotest(90, "creating temporary collection", func() error {
418                 if project.UUID == "" {
419                         return fmt.Errorf("skipping, no project to work in")
420                 }
421                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
422                 defer cancel()
423                 err := client.RequestAndDecodeContext(ctx, &collection, "POST", "arvados/v1/collections", nil, map[string]interface{}{
424                         "ensure_unique_name": true,
425                         "collection": map[string]interface{}{
426                                 "owner_uuid": project.UUID,
427                                 "name":       "test collection",
428                                 "trash_at":   time.Now().Add(time.Hour)}})
429                 if err != nil {
430                         return err
431                 }
432                 diag.verbosef("ok, uuid = %s", collection.UUID)
433                 return nil
434         })
435
436         if collection.UUID != "" {
437                 defer func() {
438                         diag.dotest(9990, "deleting temporary collection", func() error {
439                                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
440                                 defer cancel()
441                                 return client.RequestAndDecodeContext(ctx, nil, "DELETE", "arvados/v1/collections/"+collection.UUID, nil, nil)
442                         })
443                 }()
444         }
445
446         // Read hello-world.tar to find image ID, so we can upload it
447         // as "sha256:{...}.tar"
448         var imageSHA2 string
449         {
450                 tr := tar.NewReader(bytes.NewReader(HelloWorldDockerImage))
451                 for {
452                         hdr, err := tr.Next()
453                         if err == io.EOF {
454                                 break
455                         }
456                         if err != nil {
457                                 diag.errorf("internal error/bug: cannot read embedded docker image tar file: %s", err)
458                                 return
459                         }
460                         if s := strings.TrimSuffix(hdr.Name, ".json"); len(s) == 64 && s != hdr.Name {
461                                 imageSHA2 = s
462                         }
463                 }
464                 if imageSHA2 == "" {
465                         diag.errorf("internal error/bug: cannot find {sha256}.json file in embedded docker image tar file")
466                         return
467                 }
468         }
469         tarfilename := "sha256:" + imageSHA2 + ".tar"
470
471         diag.dotest(100, "uploading file via webdav", func() error {
472                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
473                 defer cancel()
474                 if collection.UUID == "" {
475                         return fmt.Errorf("skipping, no test collection")
476                 }
477                 req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage))
478                 if err != nil {
479                         return fmt.Errorf("BUG? http.NewRequest: %s", err)
480                 }
481                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
482                 resp, err := http.DefaultClient.Do(req)
483                 if err != nil {
484                         return fmt.Errorf("error performing http request: %s", err)
485                 }
486                 resp.Body.Close()
487                 if resp.StatusCode != http.StatusCreated {
488                         return fmt.Errorf("status %s", resp.Status)
489                 }
490                 diag.debugf("ok, status %s", resp.Status)
491                 err = client.RequestAndDecodeContext(ctx, &collection, "GET", "arvados/v1/collections/"+collection.UUID, nil, nil)
492                 if err != nil {
493                         return fmt.Errorf("get updated collection: %s", err)
494                 }
495                 diag.debugf("ok, pdh %s", collection.PortableDataHash)
496                 return nil
497         })
498
499         davurl := cluster.Services.WebDAV.ExternalURL
500         davWildcard := strings.HasPrefix(davurl.Host, "*--") || strings.HasPrefix(davurl.Host, "*.")
501         diag.dotest(110, fmt.Sprintf("checking WebDAV ExternalURL wildcard (%s)", davurl), func() error {
502                 if davurl.Host == "" {
503                         return fmt.Errorf("host missing - content previews will not work")
504                 }
505                 if !davWildcard && !cluster.Collections.TrustAllContent {
506                         diag.warnf("WebDAV ExternalURL has no leading wildcard and TrustAllContent==false - content previews will not work")
507                 }
508                 return nil
509         })
510
511         for i, trial := range []struct {
512                 needcoll     bool
513                 needWildcard bool
514                 status       int
515                 fileurl      string
516         }{
517                 {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "foo"},
518                 {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + tarfilename},
519                 {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo"},
520                 {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/" + tarfilename},
521                 {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + tarfilename},
522                 {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/" + tarfilename},
523         } {
524                 diag.dotest(120+i, fmt.Sprintf("downloading from webdav (%s)", trial.fileurl), func() error {
525                         if trial.needWildcard && !davWildcard {
526                                 diag.warnf("skipping collection-id-in-vhost test because WebDAV ExternalURL has no leading wildcard")
527                                 return nil
528                         }
529                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
530                         defer cancel()
531                         if trial.needcoll && collection.UUID == "" {
532                                 return fmt.Errorf("skipping, no test collection")
533                         }
534                         req, err := http.NewRequestWithContext(ctx, "GET", trial.fileurl, nil)
535                         if err != nil {
536                                 return err
537                         }
538                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
539                         resp, err := http.DefaultClient.Do(req)
540                         if err != nil {
541                                 return err
542                         }
543                         defer resp.Body.Close()
544                         body, err := ioutil.ReadAll(resp.Body)
545                         if err != nil {
546                                 return fmt.Errorf("reading response: %s", err)
547                         }
548                         if resp.StatusCode != trial.status {
549                                 return fmt.Errorf("unexpected response status: %s", resp.Status)
550                         }
551                         if trial.status == http.StatusOK && !bytes.Equal(body, HelloWorldDockerImage) {
552                                 excerpt := body
553                                 if len(excerpt) > 128 {
554                                         excerpt = append([]byte(nil), body[:128]...)
555                                         excerpt = append(excerpt, []byte("[...]")...)
556                                 }
557                                 return fmt.Errorf("unexpected response content: len %d, %q", len(body), excerpt)
558                         }
559                         return nil
560                 })
561         }
562
563         var vm arvados.VirtualMachine
564         diag.dotest(130, "getting list of virtual machines", func() error {
565                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
566                 defer cancel()
567                 var vmlist arvados.VirtualMachineList
568                 err := client.RequestAndDecodeContext(ctx, &vmlist, "GET", "arvados/v1/virtual_machines", nil, arvados.ListOptions{Limit: 999999})
569                 if err != nil {
570                         return err
571                 }
572                 if len(vmlist.Items) < 1 {
573                         diag.warnf("no VMs found")
574                 } else {
575                         vm = vmlist.Items[0]
576                 }
577                 return nil
578         })
579
580         diag.dotest(140, "getting workbench1 webshell page", func() error {
581                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
582                 defer cancel()
583                 if vm.UUID == "" {
584                         diag.warnf("skipping, no vm available")
585                         return nil
586                 }
587                 webshelltermurl := cluster.Services.Workbench1.ExternalURL.String() + "virtual_machines/" + vm.UUID + "/webshell/testusername"
588                 diag.debugf("url %s", webshelltermurl)
589                 req, err := http.NewRequestWithContext(ctx, "GET", webshelltermurl, nil)
590                 if err != nil {
591                         return err
592                 }
593                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
594                 resp, err := http.DefaultClient.Do(req)
595                 if err != nil {
596                         return err
597                 }
598                 defer resp.Body.Close()
599                 body, err := ioutil.ReadAll(resp.Body)
600                 if err != nil {
601                         return fmt.Errorf("reading response: %s", err)
602                 }
603                 if resp.StatusCode != http.StatusOK {
604                         return fmt.Errorf("unexpected response status: %s %q", resp.Status, body)
605                 }
606                 return nil
607         })
608
609         diag.dotest(150, "connecting to webshell service", func() error {
610                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
611                 defer cancel()
612                 if vm.UUID == "" {
613                         diag.warnf("skipping, no vm available")
614                         return nil
615                 }
616                 u := cluster.Services.WebShell.ExternalURL
617                 webshellurl := u.String() + vm.Hostname + "?"
618                 if strings.HasPrefix(u.Host, "*") {
619                         u.Host = vm.Hostname + u.Host[1:]
620                         webshellurl = u.String() + "?"
621                 }
622                 diag.debugf("url %s", webshellurl)
623                 req, err := http.NewRequestWithContext(ctx, "POST", webshellurl, bytes.NewBufferString(url.Values{
624                         "width":   {"80"},
625                         "height":  {"25"},
626                         "session": {"xyzzy"},
627                         "rooturl": {webshellurl},
628                 }.Encode()))
629                 if err != nil {
630                         return err
631                 }
632                 req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
633                 resp, err := http.DefaultClient.Do(req)
634                 if err != nil {
635                         return err
636                 }
637                 defer resp.Body.Close()
638                 diag.debugf("response status %s", resp.Status)
639                 body, err := ioutil.ReadAll(resp.Body)
640                 if err != nil {
641                         return fmt.Errorf("reading response: %s", err)
642                 }
643                 diag.debugf("response body %q", body)
644                 // We don't speak the protocol, so we get a 400 error
645                 // from the webshell server even if everything is
646                 // OK. Anything else (404, 502, ???) indicates a
647                 // problem.
648                 if resp.StatusCode != http.StatusBadRequest {
649                         return fmt.Errorf("unexpected response status: %s, %q", resp.Status, body)
650                 }
651                 return nil
652         })
653
654         diag.dotest(160, "running a container", func() error {
655                 if diag.priority < 1 {
656                         diag.infof("skipping (use priority > 0 if you want to run a container)")
657                         return nil
658                 }
659                 if project.UUID == "" {
660                         return fmt.Errorf("skipping, no project to work in")
661                 }
662
663                 timestamp := time.Now().Format(time.RFC3339)
664                 ctrCommand := []string{"echo", timestamp}
665                 if diag.dockerImage == "" {
666                         if collection.UUID == "" {
667                                 return fmt.Errorf("skipping, no test collection to use as docker image")
668                         }
669                         diag.dockerImage = collection.PortableDataHash
670                         ctrCommand = []string{"/hello"}
671                 }
672
673                 var cr arvados.ContainerRequest
674                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
675                 defer cancel()
676
677                 err := client.RequestAndDecodeContext(ctx, &cr, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{"container_request": map[string]interface{}{
678                         "owner_uuid":      project.UUID,
679                         "name":            fmt.Sprintf("diagnostics container request %s", timestamp),
680                         "container_image": diag.dockerImage,
681                         "command":         ctrCommand,
682                         "use_existing":    false,
683                         "output_path":     "/mnt/output",
684                         "output_name":     fmt.Sprintf("diagnostics output %s", timestamp),
685                         "priority":        diag.priority,
686                         "state":           arvados.ContainerRequestStateCommitted,
687                         "mounts": map[string]map[string]interface{}{
688                                 "/mnt/output": {
689                                         "kind":     "collection",
690                                         "writable": true,
691                                 },
692                         },
693                         "runtime_constraints": arvados.RuntimeConstraints{
694                                 VCPUs:        1,
695                                 RAM:          1 << 26,
696                                 KeepCacheRAM: 1 << 26,
697                         },
698                 }})
699                 if err != nil {
700                         return err
701                 }
702                 diag.verbosef("container request uuid = %s", cr.UUID)
703                 diag.verbosef("container uuid = %s", cr.ContainerUUID)
704
705                 timeout := 10 * time.Minute
706                 diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout))
707                 ctx, cancel = context.WithDeadline(context.Background(), time.Now().Add(timeout))
708                 defer cancel()
709
710                 var c arvados.Container
711                 for ; cr.State != arvados.ContainerRequestStateFinal; time.Sleep(2 * time.Second) {
712                         ctx, cancel := context.WithDeadline(ctx, time.Now().Add(diag.timeout))
713                         defer cancel()
714
715                         crStateWas := cr.State
716                         err := client.RequestAndDecodeContext(ctx, &cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil)
717                         if err != nil {
718                                 return err
719                         }
720                         if cr.State != crStateWas {
721                                 diag.debugf("container request state = %s", cr.State)
722                         }
723
724                         cStateWas := c.State
725                         err = client.RequestAndDecodeContext(ctx, &c, "GET", "arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
726                         if err != nil {
727                                 return err
728                         }
729                         if c.State != cStateWas {
730                                 diag.debugf("container state = %s", c.State)
731                         }
732                 }
733
734                 if c.State != arvados.ContainerStateComplete {
735                         return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State)
736                 } else if c.ExitCode != 0 {
737                         return fmt.Errorf("container exited %d", c.ExitCode)
738                 }
739                 return nil
740         })
741 }