20482: For some reason, symlinks don't seem to be ignored for copyright checks.
[arvados.git] / lib / diagnostics / cmd.go
1 // Copyright (C) The Arvados Authors. All rights reserved.
2 //
3 // SPDX-License-Identifier: AGPL-3.0
4
5 package diagnostics
6
7 import (
8         "archive/tar"
9         "bytes"
10         "context"
11         _ "embed"
12         "flag"
13         "fmt"
14         "io"
15         "io/ioutil"
16         "net"
17         "net/http"
18         "net/url"
19         "os"
20         "strings"
21         "time"
22
23         "git.arvados.org/arvados.git/lib/cmd"
24         "git.arvados.org/arvados.git/lib/config"
25         "git.arvados.org/arvados.git/sdk/go/arvados"
26         "git.arvados.org/arvados.git/sdk/go/ctxlog"
27         "git.arvados.org/arvados.git/sdk/go/health"
28         "github.com/sirupsen/logrus"
29 )
30
31 type Command struct{}
32
33 func (Command) RunCommand(prog string, args []string, stdin io.Reader, stdout, stderr io.Writer) int {
34         var diag diagnoser
35         f := flag.NewFlagSet(prog, flag.ContinueOnError)
36         f.StringVar(&diag.projectName, "project-name", "scratch area for diagnostics", "name of project to find/create in home project and use for temporary/test objects")
37         f.StringVar(&diag.logLevel, "log-level", "info", "logging level (debug, info, warning, error)")
38         f.StringVar(&diag.dockerImage, "docker-image", "", "image to use when running a test container (default: use embedded hello-world image)")
39         f.BoolVar(&diag.checkInternal, "internal-client", false, "check that this host is considered an \"internal\" client")
40         f.BoolVar(&diag.checkExternal, "external-client", false, "check that this host is considered an \"external\" client")
41         f.BoolVar(&diag.verbose, "v", false, "verbose: include more information in report")
42         f.IntVar(&diag.priority, "priority", 500, "priority for test container (1..1000, or 0 to skip)")
43         f.DurationVar(&diag.timeout, "timeout", 10*time.Second, "timeout for http requests")
44         if ok, code := cmd.ParseFlags(f, prog, args, "", stderr); !ok {
45                 return code
46         }
47         diag.logger = ctxlog.New(stdout, "text", diag.logLevel)
48         diag.logger.SetFormatter(&logrus.TextFormatter{DisableTimestamp: true, DisableLevelTruncation: true, PadLevelText: true})
49         diag.runtests()
50         if len(diag.errors) == 0 {
51                 diag.logger.Info("--- no errors ---")
52                 return 0
53         } else {
54                 if diag.logger.Level > logrus.ErrorLevel {
55                         fmt.Fprint(stdout, "\n--- cut here --- error summary ---\n\n")
56                         for _, e := range diag.errors {
57                                 diag.logger.Error(e)
58                         }
59                 }
60                 return 1
61         }
62 }
63
64 // docker save hello-world > hello-world.tar
65 //
66 //go:embed hello-world.tar
67 var HelloWorldDockerImage []byte
68
69 type diagnoser struct {
70         stdout        io.Writer
71         stderr        io.Writer
72         logLevel      string
73         priority      int
74         projectName   string
75         dockerImage   string
76         checkInternal bool
77         checkExternal bool
78         verbose       bool
79         timeout       time.Duration
80         logger        *logrus.Logger
81         errors        []string
82         done          map[int]bool
83 }
84
85 func (diag *diagnoser) debugf(f string, args ...interface{}) {
86         diag.logger.Debugf("  ... "+f, args...)
87 }
88
89 func (diag *diagnoser) infof(f string, args ...interface{}) {
90         diag.logger.Infof("  ... "+f, args...)
91 }
92
93 func (diag *diagnoser) verbosef(f string, args ...interface{}) {
94         if diag.verbose {
95                 diag.logger.Infof("  ... "+f, args...)
96         }
97 }
98
99 func (diag *diagnoser) warnf(f string, args ...interface{}) {
100         diag.logger.Warnf("  ... "+f, args...)
101 }
102
103 func (diag *diagnoser) errorf(f string, args ...interface{}) {
104         diag.logger.Errorf(f, args...)
105         diag.errors = append(diag.errors, fmt.Sprintf(f, args...))
106 }
107
108 // Run the given func, logging appropriate messages before and after,
109 // adding timing info, etc.
110 //
111 // The id argument should be unique among tests, and shouldn't change
112 // when other tests are added/removed.
113 func (diag *diagnoser) dotest(id int, title string, fn func() error) {
114         if diag.done == nil {
115                 diag.done = map[int]bool{}
116         } else if diag.done[id] {
117                 diag.errorf("(bug) reused test id %d", id)
118         }
119         diag.done[id] = true
120
121         diag.logger.Infof("%4d: %s", id, title)
122         t0 := time.Now()
123         err := fn()
124         elapsed := fmt.Sprintf("%d ms", time.Now().Sub(t0)/time.Millisecond)
125         if err != nil {
126                 diag.errorf("%4d: %s (%s): %s", id, title, elapsed, err)
127         } else {
128                 diag.logger.Debugf("%4d: %s (%s): ok", id, title, elapsed)
129         }
130 }
131
132 func (diag *diagnoser) runtests() {
133         client := arvados.NewClientFromEnv()
134         // Disable auto-retry, use context instead
135         client.Timeout = 0
136
137         if client.APIHost == "" || client.AuthToken == "" {
138                 diag.errorf("ARVADOS_API_HOST and ARVADOS_API_TOKEN environment variables are not set -- aborting without running any tests")
139                 return
140         }
141
142         hostname, err := os.Hostname()
143         if err != nil {
144                 diag.warnf("error getting hostname: %s")
145         } else {
146                 diag.verbosef("hostname = %s", hostname)
147         }
148
149         diag.dotest(5, "running health check (same as `arvados-server check`)", func() error {
150                 ldr := config.NewLoader(&bytes.Buffer{}, ctxlog.New(&bytes.Buffer{}, "text", "info"))
151                 ldr.SetupFlags(flag.NewFlagSet("diagnostics", flag.ContinueOnError))
152                 cfg, err := ldr.Load()
153                 if err != nil {
154                         diag.infof("skipping because config could not be loaded: %s", err)
155                         return nil
156                 }
157                 cluster, err := cfg.GetCluster("")
158                 if err != nil {
159                         return err
160                 }
161                 if cluster.SystemRootToken != os.Getenv("ARVADOS_API_TOKEN") {
162                         return fmt.Errorf("diagnostics usage error: %s is readable but SystemRootToken does not match $ARVADOS_API_TOKEN (to fix, either run 'arvados-client sudo diagnostics' to load everything from config file, or set ARVADOS_CONFIG=- to load nothing from config file)", ldr.Path)
163                 }
164                 agg := &health.Aggregator{Cluster: cluster}
165                 resp := agg.ClusterHealth()
166                 for _, e := range resp.Errors {
167                         diag.errorf("health check: %s", e)
168                 }
169                 if len(resp.Errors) > 0 {
170                         diag.infof("consider running `arvados-server check -yaml` for a comprehensive report")
171                 }
172                 diag.verbosef("reported clock skew = %v", resp.ClockSkew)
173                 reported := map[string]bool{}
174                 for _, result := range resp.Checks {
175                         version := strings.SplitN(result.Metrics.Version, " (go", 2)[0]
176                         if version != "" && !reported[version] {
177                                 diag.verbosef("arvados version = %s", version)
178                                 reported[version] = true
179                         }
180                 }
181                 reported = map[string]bool{}
182                 for _, result := range resp.Checks {
183                         if result.Server != "" && !reported[result.Server] {
184                                 diag.verbosef("http frontend version = %s", result.Server)
185                                 reported[result.Server] = true
186                         }
187                 }
188                 reported = map[string]bool{}
189                 for _, result := range resp.Checks {
190                         if sha := result.ConfigSourceSHA256; sha != "" && !reported[sha] {
191                                 diag.verbosef("config file sha256 = %s", sha)
192                                 reported[sha] = true
193                         }
194                 }
195                 return nil
196         })
197
198         var dd arvados.DiscoveryDocument
199         ddpath := "discovery/v1/apis/arvados/v1/rest"
200         diag.dotest(10, fmt.Sprintf("getting discovery document from https://%s/%s", client.APIHost, ddpath), func() error {
201                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
202                 defer cancel()
203                 err := client.RequestAndDecodeContext(ctx, &dd, "GET", ddpath, nil, nil)
204                 if err != nil {
205                         return err
206                 }
207                 diag.verbosef("BlobSignatureTTL = %d", dd.BlobSignatureTTL)
208                 return nil
209         })
210
211         var cluster arvados.Cluster
212         cfgpath := "arvados/v1/config"
213         cfgOK := false
214         diag.dotest(20, fmt.Sprintf("getting exported config from https://%s/%s", client.APIHost, cfgpath), func() error {
215                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
216                 defer cancel()
217                 err := client.RequestAndDecodeContext(ctx, &cluster, "GET", cfgpath, nil, nil)
218                 if err != nil {
219                         return err
220                 }
221                 diag.verbosef("Collections.BlobSigning = %v", cluster.Collections.BlobSigning)
222                 cfgOK = true
223                 return nil
224         })
225
226         var user arvados.User
227         diag.dotest(30, "getting current user record", func() error {
228                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
229                 defer cancel()
230                 err := client.RequestAndDecodeContext(ctx, &user, "GET", "arvados/v1/users/current", nil, nil)
231                 if err != nil {
232                         return err
233                 }
234                 diag.verbosef("user uuid = %s", user.UUID)
235                 return nil
236         })
237
238         if !cfgOK {
239                 diag.errorf("cannot proceed without cluster config -- aborting without running any further tests")
240                 return
241         }
242
243         // uncomment to create some spurious errors
244         // cluster.Services.WebDAVDownload.ExternalURL.Host = "0.0.0.0:9"
245
246         // TODO: detect routing errors here, like finding wb2 at the
247         // wb1 address.
248         for i, svc := range []*arvados.Service{
249                 &cluster.Services.Keepproxy,
250                 &cluster.Services.WebDAV,
251                 &cluster.Services.WebDAVDownload,
252                 &cluster.Services.Websocket,
253                 &cluster.Services.Workbench1,
254                 &cluster.Services.Workbench2,
255         } {
256                 diag.dotest(40+i, fmt.Sprintf("connecting to service endpoint %s", svc.ExternalURL), func() error {
257                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
258                         defer cancel()
259                         u := svc.ExternalURL
260                         if strings.HasPrefix(u.Scheme, "ws") {
261                                 // We can do a real websocket test elsewhere,
262                                 // but for now we'll just check the https
263                                 // connection.
264                                 u.Scheme = "http" + u.Scheme[2:]
265                         }
266                         if svc == &cluster.Services.WebDAV && strings.HasPrefix(u.Host, "*") {
267                                 u.Host = "d41d8cd98f00b204e9800998ecf8427e-0" + u.Host[1:]
268                         }
269                         req, err := http.NewRequestWithContext(ctx, http.MethodGet, u.String(), nil)
270                         if err != nil {
271                                 return err
272                         }
273                         resp, err := http.DefaultClient.Do(req)
274                         if err != nil {
275                                 return err
276                         }
277                         resp.Body.Close()
278                         return nil
279                 })
280         }
281
282         for i, url := range []string{
283                 cluster.Services.Controller.ExternalURL.String(),
284                 cluster.Services.Keepproxy.ExternalURL.String() + "d41d8cd98f00b204e9800998ecf8427e+0",
285                 cluster.Services.WebDAVDownload.ExternalURL.String(),
286         } {
287                 diag.dotest(50+i, fmt.Sprintf("checking CORS headers at %s", url), func() error {
288                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
289                         defer cancel()
290                         req, err := http.NewRequestWithContext(ctx, "GET", url, nil)
291                         if err != nil {
292                                 return err
293                         }
294                         req.Header.Set("Origin", "https://example.com")
295                         resp, err := http.DefaultClient.Do(req)
296                         if err != nil {
297                                 return err
298                         }
299                         if hdr := resp.Header.Get("Access-Control-Allow-Origin"); hdr != "*" {
300                                 return fmt.Errorf("expected \"Access-Control-Allow-Origin: *\", got %q", hdr)
301                         }
302                         return nil
303                 })
304         }
305
306         var keeplist arvados.KeepServiceList
307         diag.dotest(60, "checking internal/external client detection", func() error {
308                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
309                 defer cancel()
310                 err := client.RequestAndDecodeContext(ctx, &keeplist, "GET", "arvados/v1/keep_services/accessible", nil, arvados.ListOptions{Limit: 999999})
311                 if err != nil {
312                         return fmt.Errorf("error getting keep services list: %s", err)
313                 } else if len(keeplist.Items) == 0 {
314                         return fmt.Errorf("controller did not return any keep services")
315                 }
316                 found := map[string]int{}
317                 for _, ks := range keeplist.Items {
318                         found[ks.ServiceType]++
319                 }
320                 isInternal := found["proxy"] == 0 && len(keeplist.Items) > 0
321                 isExternal := found["proxy"] > 0 && found["proxy"] == len(keeplist.Items)
322                 if isExternal {
323                         diag.infof("controller returned only proxy services, this host is treated as \"external\"")
324                 } else if isInternal {
325                         diag.infof("controller returned only non-proxy services, this host is treated as \"internal\"")
326                 }
327                 if (diag.checkInternal && !isInternal) || (diag.checkExternal && !isExternal) {
328                         return fmt.Errorf("expecting internal=%v external=%v, but found internal=%v external=%v", diag.checkInternal, diag.checkExternal, isInternal, isExternal)
329                 }
330                 return nil
331         })
332
333         for i, ks := range keeplist.Items {
334                 u := url.URL{
335                         Scheme: "http",
336                         Host:   net.JoinHostPort(ks.ServiceHost, fmt.Sprintf("%d", ks.ServicePort)),
337                         Path:   "/",
338                 }
339                 if ks.ServiceSSLFlag {
340                         u.Scheme = "https"
341                 }
342                 diag.dotest(61+i, fmt.Sprintf("reading+writing via keep service at %s", u.String()), func() error {
343                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
344                         defer cancel()
345                         req, err := http.NewRequestWithContext(ctx, "PUT", u.String()+"d41d8cd98f00b204e9800998ecf8427e", nil)
346                         if err != nil {
347                                 return err
348                         }
349                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
350                         resp, err := http.DefaultClient.Do(req)
351                         if err != nil {
352                                 return err
353                         }
354                         defer resp.Body.Close()
355                         body, err := ioutil.ReadAll(resp.Body)
356                         if err != nil {
357                                 return fmt.Errorf("reading response body: %s", err)
358                         }
359                         loc := strings.TrimSpace(string(body))
360                         if !strings.HasPrefix(loc, "d41d8") {
361                                 return fmt.Errorf("unexpected response from write: %q", body)
362                         }
363
364                         req, err = http.NewRequestWithContext(ctx, "GET", u.String()+loc, nil)
365                         if err != nil {
366                                 return err
367                         }
368                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
369                         resp, err = http.DefaultClient.Do(req)
370                         if err != nil {
371                                 return err
372                         }
373                         defer resp.Body.Close()
374                         body, err = ioutil.ReadAll(resp.Body)
375                         if err != nil {
376                                 return fmt.Errorf("reading response body: %s", err)
377                         }
378                         if len(body) != 0 {
379                                 return fmt.Errorf("unexpected response from read: %q", body)
380                         }
381
382                         return nil
383                 })
384         }
385
386         var project arvados.Group
387         diag.dotest(80, fmt.Sprintf("finding/creating %q project", diag.projectName), func() error {
388                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
389                 defer cancel()
390                 var grplist arvados.GroupList
391                 err := client.RequestAndDecodeContext(ctx, &grplist, "GET", "arvados/v1/groups", nil, arvados.ListOptions{
392                         Filters: []arvados.Filter{
393                                 {"name", "=", diag.projectName},
394                                 {"group_class", "=", "project"},
395                                 {"owner_uuid", "=", user.UUID}},
396                         Limit: 999999})
397                 if err != nil {
398                         return fmt.Errorf("list groups: %s", err)
399                 }
400                 if len(grplist.Items) > 0 {
401                         project = grplist.Items[0]
402                         diag.verbosef("using existing project, uuid = %s", project.UUID)
403                         return nil
404                 }
405                 diag.debugf("list groups: ok, no results")
406                 err = client.RequestAndDecodeContext(ctx, &project, "POST", "arvados/v1/groups", nil, map[string]interface{}{"group": map[string]interface{}{
407                         "name":        diag.projectName,
408                         "group_class": "project",
409                 }})
410                 if err != nil {
411                         return fmt.Errorf("create project: %s", err)
412                 }
413                 diag.verbosef("created project, uuid = %s", project.UUID)
414                 return nil
415         })
416
417         var collection arvados.Collection
418         diag.dotest(90, "creating temporary collection", func() error {
419                 if project.UUID == "" {
420                         return fmt.Errorf("skipping, no project to work in")
421                 }
422                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
423                 defer cancel()
424                 err := client.RequestAndDecodeContext(ctx, &collection, "POST", "arvados/v1/collections", nil, map[string]interface{}{
425                         "ensure_unique_name": true,
426                         "collection": map[string]interface{}{
427                                 "owner_uuid": project.UUID,
428                                 "name":       "test collection",
429                                 "trash_at":   time.Now().Add(time.Hour)}})
430                 if err != nil {
431                         return err
432                 }
433                 diag.verbosef("ok, uuid = %s", collection.UUID)
434                 return nil
435         })
436
437         if collection.UUID != "" {
438                 defer func() {
439                         diag.dotest(9990, "deleting temporary collection", func() error {
440                                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
441                                 defer cancel()
442                                 return client.RequestAndDecodeContext(ctx, nil, "DELETE", "arvados/v1/collections/"+collection.UUID, nil, nil)
443                         })
444                 }()
445         }
446
447         // Read hello-world.tar to find image ID, so we can upload it
448         // as "sha256:{...}.tar"
449         var imageSHA2 string
450         {
451                 tr := tar.NewReader(bytes.NewReader(HelloWorldDockerImage))
452                 for {
453                         hdr, err := tr.Next()
454                         if err == io.EOF {
455                                 break
456                         }
457                         if err != nil {
458                                 diag.errorf("internal error/bug: cannot read embedded docker image tar file: %s", err)
459                                 return
460                         }
461                         if s := strings.TrimSuffix(hdr.Name, ".json"); len(s) == 64 && s != hdr.Name {
462                                 imageSHA2 = s
463                         }
464                 }
465                 if imageSHA2 == "" {
466                         diag.errorf("internal error/bug: cannot find {sha256}.json file in embedded docker image tar file")
467                         return
468                 }
469         }
470         tarfilename := "sha256:" + imageSHA2 + ".tar"
471
472         diag.dotest(100, "uploading file via webdav", func() error {
473                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
474                 defer cancel()
475                 if collection.UUID == "" {
476                         return fmt.Errorf("skipping, no test collection")
477                 }
478                 req, err := http.NewRequestWithContext(ctx, "PUT", cluster.Services.WebDAVDownload.ExternalURL.String()+"c="+collection.UUID+"/"+tarfilename, bytes.NewReader(HelloWorldDockerImage))
479                 if err != nil {
480                         return fmt.Errorf("BUG? http.NewRequest: %s", err)
481                 }
482                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
483                 resp, err := http.DefaultClient.Do(req)
484                 if err != nil {
485                         return fmt.Errorf("error performing http request: %s", err)
486                 }
487                 resp.Body.Close()
488                 if resp.StatusCode != http.StatusCreated {
489                         return fmt.Errorf("status %s", resp.Status)
490                 }
491                 diag.debugf("ok, status %s", resp.Status)
492                 err = client.RequestAndDecodeContext(ctx, &collection, "GET", "arvados/v1/collections/"+collection.UUID, nil, nil)
493                 if err != nil {
494                         return fmt.Errorf("get updated collection: %s", err)
495                 }
496                 diag.debugf("ok, pdh %s", collection.PortableDataHash)
497                 return nil
498         })
499
500         davurl := cluster.Services.WebDAV.ExternalURL
501         davWildcard := strings.HasPrefix(davurl.Host, "*--") || strings.HasPrefix(davurl.Host, "*.")
502         diag.dotest(110, fmt.Sprintf("checking WebDAV ExternalURL wildcard (%s)", davurl), func() error {
503                 if davurl.Host == "" {
504                         return fmt.Errorf("host missing - content previews will not work")
505                 }
506                 if !davWildcard && !cluster.Collections.TrustAllContent {
507                         diag.warnf("WebDAV ExternalURL has no leading wildcard and TrustAllContent==false - content previews will not work")
508                 }
509                 return nil
510         })
511
512         for i, trial := range []struct {
513                 needcoll     bool
514                 needWildcard bool
515                 status       int
516                 fileurl      string
517         }{
518                 {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + "foo"},
519                 {false, false, http.StatusNotFound, strings.Replace(davurl.String(), "*", "d41d8cd98f00b204e9800998ecf8427e-0", 1) + tarfilename},
520                 {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/foo"},
521                 {false, false, http.StatusNotFound, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=d41d8cd98f00b204e9800998ecf8427e+0/_/" + tarfilename},
522                 {true, true, http.StatusOK, strings.Replace(davurl.String(), "*", strings.Replace(collection.PortableDataHash, "+", "-", -1), 1) + tarfilename},
523                 {true, false, http.StatusOK, cluster.Services.WebDAVDownload.ExternalURL.String() + "c=" + collection.UUID + "/_/" + tarfilename},
524         } {
525                 diag.dotest(120+i, fmt.Sprintf("downloading from webdav (%s)", trial.fileurl), func() error {
526                         if trial.needWildcard && !davWildcard {
527                                 diag.warnf("skipping collection-id-in-vhost test because WebDAV ExternalURL has no leading wildcard")
528                                 return nil
529                         }
530                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
531                         defer cancel()
532                         if trial.needcoll && collection.UUID == "" {
533                                 return fmt.Errorf("skipping, no test collection")
534                         }
535                         req, err := http.NewRequestWithContext(ctx, "GET", trial.fileurl, nil)
536                         if err != nil {
537                                 return err
538                         }
539                         req.Header.Set("Authorization", "Bearer "+client.AuthToken)
540                         resp, err := http.DefaultClient.Do(req)
541                         if err != nil {
542                                 return err
543                         }
544                         defer resp.Body.Close()
545                         body, err := ioutil.ReadAll(resp.Body)
546                         if err != nil {
547                                 return fmt.Errorf("reading response: %s", err)
548                         }
549                         if resp.StatusCode != trial.status {
550                                 return fmt.Errorf("unexpected response status: %s", resp.Status)
551                         }
552                         if trial.status == http.StatusOK && !bytes.Equal(body, HelloWorldDockerImage) {
553                                 excerpt := body
554                                 if len(excerpt) > 128 {
555                                         excerpt = append([]byte(nil), body[:128]...)
556                                         excerpt = append(excerpt, []byte("[...]")...)
557                                 }
558                                 return fmt.Errorf("unexpected response content: len %d, %q", len(body), excerpt)
559                         }
560                         return nil
561                 })
562         }
563
564         var vm arvados.VirtualMachine
565         diag.dotest(130, "getting list of virtual machines", func() error {
566                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
567                 defer cancel()
568                 var vmlist arvados.VirtualMachineList
569                 err := client.RequestAndDecodeContext(ctx, &vmlist, "GET", "arvados/v1/virtual_machines", nil, arvados.ListOptions{Limit: 999999})
570                 if err != nil {
571                         return err
572                 }
573                 if len(vmlist.Items) < 1 {
574                         diag.warnf("no VMs found")
575                 } else {
576                         vm = vmlist.Items[0]
577                 }
578                 return nil
579         })
580
581         diag.dotest(140, "getting workbench1 webshell page", func() error {
582                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
583                 defer cancel()
584                 if vm.UUID == "" {
585                         diag.warnf("skipping, no vm available")
586                         return nil
587                 }
588                 webshelltermurl := cluster.Services.Workbench1.ExternalURL.String() + "virtual_machines/" + vm.UUID + "/webshell/testusername"
589                 diag.debugf("url %s", webshelltermurl)
590                 req, err := http.NewRequestWithContext(ctx, "GET", webshelltermurl, nil)
591                 if err != nil {
592                         return err
593                 }
594                 req.Header.Set("Authorization", "Bearer "+client.AuthToken)
595                 resp, err := http.DefaultClient.Do(req)
596                 if err != nil {
597                         return err
598                 }
599                 defer resp.Body.Close()
600                 body, err := ioutil.ReadAll(resp.Body)
601                 if err != nil {
602                         return fmt.Errorf("reading response: %s", err)
603                 }
604                 if resp.StatusCode != http.StatusOK {
605                         return fmt.Errorf("unexpected response status: %s %q", resp.Status, body)
606                 }
607                 return nil
608         })
609
610         diag.dotest(150, "connecting to webshell service", func() error {
611                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
612                 defer cancel()
613                 if vm.UUID == "" {
614                         diag.warnf("skipping, no vm available")
615                         return nil
616                 }
617                 u := cluster.Services.WebShell.ExternalURL
618                 webshellurl := u.String() + vm.Hostname + "?"
619                 if strings.HasPrefix(u.Host, "*") {
620                         u.Host = vm.Hostname + u.Host[1:]
621                         webshellurl = u.String() + "?"
622                 }
623                 diag.debugf("url %s", webshellurl)
624                 req, err := http.NewRequestWithContext(ctx, "POST", webshellurl, bytes.NewBufferString(url.Values{
625                         "width":   {"80"},
626                         "height":  {"25"},
627                         "session": {"xyzzy"},
628                         "rooturl": {webshellurl},
629                 }.Encode()))
630                 if err != nil {
631                         return err
632                 }
633                 req.Header.Set("Content-Type", "application/x-www-form-urlencoded; charset=UTF-8")
634                 resp, err := http.DefaultClient.Do(req)
635                 if err != nil {
636                         return err
637                 }
638                 defer resp.Body.Close()
639                 diag.debugf("response status %s", resp.Status)
640                 body, err := ioutil.ReadAll(resp.Body)
641                 if err != nil {
642                         return fmt.Errorf("reading response: %s", err)
643                 }
644                 diag.debugf("response body %q", body)
645                 // We don't speak the protocol, so we get a 400 error
646                 // from the webshell server even if everything is
647                 // OK. Anything else (404, 502, ???) indicates a
648                 // problem.
649                 if resp.StatusCode != http.StatusBadRequest {
650                         return fmt.Errorf("unexpected response status: %s, %q", resp.Status, body)
651                 }
652                 return nil
653         })
654
655         diag.dotest(160, "running a container", func() error {
656                 if diag.priority < 1 {
657                         diag.infof("skipping (use priority > 0 if you want to run a container)")
658                         return nil
659                 }
660                 if project.UUID == "" {
661                         return fmt.Errorf("skipping, no project to work in")
662                 }
663
664                 timestamp := time.Now().Format(time.RFC3339)
665                 ctrCommand := []string{"echo", timestamp}
666                 if diag.dockerImage == "" {
667                         if collection.UUID == "" {
668                                 return fmt.Errorf("skipping, no test collection to use as docker image")
669                         }
670                         diag.dockerImage = collection.PortableDataHash
671                         ctrCommand = []string{"/hello"}
672                 }
673
674                 var cr arvados.ContainerRequest
675                 ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
676                 defer cancel()
677
678                 err := client.RequestAndDecodeContext(ctx, &cr, "POST", "arvados/v1/container_requests", nil, map[string]interface{}{"container_request": map[string]interface{}{
679                         "owner_uuid":      project.UUID,
680                         "name":            fmt.Sprintf("diagnostics container request %s", timestamp),
681                         "container_image": diag.dockerImage,
682                         "command":         ctrCommand,
683                         "use_existing":    false,
684                         "output_path":     "/mnt/output",
685                         "output_name":     fmt.Sprintf("diagnostics output %s", timestamp),
686                         "priority":        diag.priority,
687                         "state":           arvados.ContainerRequestStateCommitted,
688                         "mounts": map[string]map[string]interface{}{
689                                 "/mnt/output": {
690                                         "kind":     "collection",
691                                         "writable": true,
692                                 },
693                         },
694                         "runtime_constraints": arvados.RuntimeConstraints{
695                                 VCPUs:        1,
696                                 RAM:          1 << 26,
697                                 KeepCacheRAM: 1 << 26,
698                         },
699                 }})
700                 if err != nil {
701                         return err
702                 }
703                 diag.verbosef("container request uuid = %s", cr.UUID)
704                 diag.verbosef("container uuid = %s", cr.ContainerUUID)
705
706                 timeout := 10 * time.Minute
707                 diag.infof("container request submitted, waiting up to %v for container to run", arvados.Duration(timeout))
708                 deadline := time.Now().Add(timeout)
709
710                 var c arvados.Container
711                 for ; cr.State != arvados.ContainerRequestStateFinal && time.Now().Before(deadline); time.Sleep(2 * time.Second) {
712                         ctx, cancel := context.WithDeadline(context.Background(), time.Now().Add(diag.timeout))
713                         defer cancel()
714
715                         crStateWas := cr.State
716                         err := client.RequestAndDecodeContext(ctx, &cr, "GET", "arvados/v1/container_requests/"+cr.UUID, nil, nil)
717                         if err != nil {
718                                 return err
719                         }
720                         if cr.State != crStateWas {
721                                 diag.debugf("container request state = %s", cr.State)
722                         }
723
724                         cStateWas := c.State
725                         err = client.RequestAndDecodeContext(ctx, &c, "GET", "arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
726                         if err != nil {
727                                 return err
728                         }
729                         if c.State != cStateWas {
730                                 diag.debugf("container state = %s", c.State)
731                         }
732
733                         cancel()
734                 }
735
736                 if cr.State != arvados.ContainerRequestStateFinal {
737                         err := client.RequestAndDecodeContext(context.Background(), &cr, "PATCH", "arvados/v1/container_requests/"+cr.UUID, nil, map[string]interface{}{
738                                 "container_request": map[string]interface{}{
739                                         "priority": 0,
740                                 }})
741                         if err != nil {
742                                 diag.infof("error canceling container request %s: %s", cr.UUID, err)
743                         } else {
744                                 diag.debugf("canceled container request %s", cr.UUID)
745                         }
746                         return fmt.Errorf("timed out waiting for container to finish; container request %s state was %q, container %s state was %q", cr.UUID, cr.State, c.UUID, c.State)
747                 }
748                 if c.State != arvados.ContainerStateComplete {
749                         return fmt.Errorf("container request %s is final but container %s did not complete: container state = %q", cr.UUID, cr.ContainerUUID, c.State)
750                 }
751                 if c.ExitCode != 0 {
752                         return fmt.Errorf("container exited %d", c.ExitCode)
753                 }
754                 return nil
755         })
756 }