Merge branch '21165-installer-uninstalls-wb1'. Closes #21165
authorLucas Di Pentima <lucas.dipentima@curii.com>
Fri, 15 Mar 2024 18:13:11 +0000 (15:13 -0300)
committerLucas Di Pentima <lucas.dipentima@curii.com>
Fri, 15 Mar 2024 18:13:11 +0000 (15:13 -0300)
Arvados-DCO-1.1-Signed-off-by: Lucas Di Pentima <lucas.dipentima@curii.com>

24 files changed:
build/run-tests.sh
doc/api/dispatch.html.textile.liquid
doc/api/methods/container_requests.html.textile.liquid
doc/sdk/python/cookbook.html.textile.liquid
lib/config/config.default.yml
lib/controller/federation/conn.go
lib/controller/integration_test.go
lib/controller/localdb/container_request.go
lib/controller/router/router.go
lib/controller/rpc/conn.go
lib/dispatchcloud/dispatcher.go
lib/dispatchcloud/dispatcher_test.go
lib/dispatchcloud/scheduler/run_queue.go
lib/dispatchcloud/scheduler/scheduler.go
sdk/go/arvados/api.go
sdk/go/arvados/container.go
sdk/go/arvadostest/api.go
sdk/python/arvados-v1-discovery.json
sdk/python/arvados/events.py
services/api/app/controllers/arvados/v1/container_requests_controller.rb
services/api/config/routes.rb
services/workbench2/cypress/e2e/banner-tooltip.cy.js
services/workbench2/src/views-components/data-explorer/renderers.tsx
tools/arvbox/lib/arvbox/docker/service/sdk/run-service

index 28e9e1cf7be17d98704a312f8841fa946db9fd43..fa0abbc07cd3c534d37fe1908f6a6fd51c217ca4 100755 (executable)
@@ -971,11 +971,14 @@ install_services/api() {
 
 declare -a pythonstuff
 pythonstuff=(
+    # The ordering of sdk/python, tools/crunchstat-summary, and
+    # sdk/cwl here is significant. See
+    # https://dev.arvados.org/issues/19744#note-26
     sdk/python:py3
+    tools/crunchstat-summary:py3
     sdk/cwl:py3
     services/dockercleaner:py3
     services/fuse:py3
-    tools/crunchstat-summary:py3
 )
 
 declare -a gostuff
@@ -1055,9 +1058,13 @@ install_deps() {
     do_install cmd/arvados-server go
     do_install sdk/cli
     do_install sdk/python pip "${VENV3DIR}/bin/"
+    do_install tools/crunchstat-summary pip "${VENV3DIR}/bin/"
     do_install sdk/ruby-google-api-client
     do_install sdk/ruby
     do_install services/api
+    # lib/controller integration tests depend on arv-mount to run
+    # containers.
+    do_install services/fuse pip "${VENV3DIR}/bin/"
     do_install services/keepproxy go
     do_install services/keep-web go
 }
index b06136db9a8a219b8a93c27c0decbca19324b60e..cfe57640c4785cca1c5ca4df4faadbd8cea996b1 100644 (file)
@@ -32,6 +32,7 @@ Return a list of containers that are either ready to dispatch, or being started/
 Each entry in the returned list of @items@ includes:
 * an @instance_type@ entry with the name and attributes of the instance type that will be used to schedule the container (chosen from the @InstanceTypes@ section of your cluster config file); and
 * a @container@ entry with selected attributes of the container itself, including @uuid@, @priority@, @runtime_constraints@, and @state@. Other fields of the container records are not loaded by the dispatcher, and will have empty/zero values here (e.g., @{...,"created_at":"0001-01-01T00:00:00Z","command":[],...}@).
+* a @scheduling_status@ field with a brief explanation of the container's status in the dispatch queue, or an empty string if scheduling is not applicable, e.g., the container has already started running.
 
 Example response:
 
@@ -56,12 +57,31 @@ Example response:
         "AddedScratch": 0,
         "Price": 0.146,
         "Preemptible": false
-      }
+      },
+      "scheduling_status": "waiting for new instance to be ready"
     },
     ...
   ]
 }</pre></notextile>
 
+h3. Get specified container
+
+@GET /arvados/v1/dispatch/container?container_uuid={uuid}@
+
+Return the same information as "list containers" above, but for a single specified container.
+
+Example response:
+
+<notextile><pre>{
+  "container": {
+    ...
+  },
+  "instance_type": {
+    ...
+  },
+  "scheduling_status": "waiting for new instance to be ready"
+}</pre></notextile>
+
 h3. Terminate a container
 
 @POST /arvados/v1/dispatch/containers/kill?container_uuid={uuid}&reason={string}@
index c108c32808877b76e8d6af647c1dd5da63d66bbc..1c269fb3e613cf0c8d03c2ac99fbc25f20a9b7e7 100644 (file)
@@ -224,6 +224,26 @@ Setting the priority of a committed container_request to 0 may cancel a running
 See "Canceling a container request":{{site.baseurl}}/api/methods/container_requests.html#cancel_container for further details.
 {% include 'notebox_end' %}
 
+h3(#container_status). container_status
+
+Get container status.
+
+table(table table-bordered table-condensed).
+|_. Argument |_. Type |_. Description |_. Location |
+{background:#ccffcc}.|uuid|string|The UUID of the container request in question.|path|
+
+Example request: @GET /arvados/v1/container_requests/zzzzz-xvdhp-0123456789abcde/container_status@
+
+Response attributes:
+
+table(table table-bordered table-condensed).
+|_. Attribute|_. Type|_. Description|_. Examples|
+|uuid|string|The UUID of the container assigned to this request.||
+|state|string|The state of the container assigned to this request (see "container resource attributes":containers.html).||
+|scheduling_status|string|A brief explanation of the container's status in the dispatch queue, or an empty string if scheduling is not applicable, e.g., the container is running or finished.|@waiting for cloud resources: queue position 3@
+@creating new instance@
+@preparing runtime environment@|
+
 h3(#log). log
 
 Get container log data using WebDAV methods.
index c9e1f05f17567a76c362c4b7d5968a0d3352854a..d7d34fc0b0aa91221b26f38282dd85642559f73e 100644 (file)
@@ -498,7 +498,7 @@ dst_collection.copy(
     # The path of the source file or directory to copy
     'ExamplePath',
     # The path where the source file or directory will be copied.
-    # Pass the empty string like this to copy it to the same path.
+    # Pass an empty string like this to copy it to the same path.
     '',
     # The collection where the source file or directory comes from.
     # If not specified, the default is the current collection (so you'll
index e3b67f725932a8f899e95354064a3b10e6977fe0..fa74e8e6c6d52a37ee50f36efa0cf7626086a43e 100644 (file)
@@ -863,7 +863,7 @@ Clusters:
         # OpenID claim field containing the email verification
         # flag. Normally "email_verified".  To accept every returned
         # email address without checking a "verified" field at all,
-        # use the empty string "".
+        # use an empty string "".
         EmailVerifiedClaim: "email_verified"
 
         # OpenID claim field containing the user's preferred
@@ -1561,7 +1561,7 @@ Clusters:
         # container runner/supervisor. The default value is the
         # dispatcher program itself.
         #
-        # Use the empty string to disable this step: nothing will be
+        # Use an empty string to disable this step: nothing will be
         # copied, and cloud instances are assumed to have a suitable
         # version of crunch-run installed; see CrunchRunCommand above.
         DeployRunnerBinary: "/proc/self/exe"
index c5facdc7d9cf58fd2dbfa01091620a0a528e2381..949cc56dd24cc34b71a8f7ef8ea7ac1d15df6e29 100644 (file)
@@ -510,6 +510,10 @@ func (conn *Conn) ContainerRequestDelete(ctx context.Context, options arvados.De
        return conn.chooseBackend(options.UUID).ContainerRequestDelete(ctx, options)
 }
 
+func (conn *Conn) ContainerRequestContainerStatus(ctx context.Context, options arvados.GetOptions) (arvados.ContainerStatus, error) {
+       return conn.chooseBackend(options.UUID).ContainerRequestContainerStatus(ctx, options)
+}
+
 func (conn *Conn) ContainerRequestLog(ctx context.Context, options arvados.ContainerLogOptions) (http.Handler, error) {
        return conn.chooseBackend(options.UUID).ContainerRequestLog(ctx, options)
 }
index 4bf7a03447980ccd4d92637baf0ce8c1ca514a6c..53e6a90b8f2fee1d18237c157ccef0474b703227 100644 (file)
@@ -1244,10 +1244,22 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token stri
 
        var ctr arvados.Container
        var lastState arvados.ContainerState
+       var status, lastStatus arvados.ContainerStatus
+       var allStatus string
+       checkstatus := func() {
+               err := ac.RequestAndDecode(&status, "GET", "/arvados/v1/container_requests/"+cr.UUID+"/container_status", nil, nil)
+               c.Assert(err, check.IsNil)
+               if status != lastStatus {
+                       c.Logf("container status: %s, %s", status.State, status.SchedulingStatus)
+                       allStatus += fmt.Sprintf("%s, %s\n", status.State, status.SchedulingStatus)
+                       lastStatus = status
+               }
+       }
        deadline := time.Now().Add(time.Minute)
-       for cr.State != arvados.ContainerRequestStateFinal {
+       for cr.State != arvados.ContainerRequestStateFinal || (lastStatus.State != arvados.ContainerStateComplete && lastStatus.State != arvados.ContainerStateCancelled) {
                err = ac.RequestAndDecode(&cr, "GET", "/arvados/v1/container_requests/"+cr.UUID, nil, nil)
                c.Assert(err, check.IsNil)
+               checkstatus()
                err = ac.RequestAndDecode(&ctr, "GET", "/arvados/v1/containers/"+cr.ContainerUUID, nil, nil)
                if err != nil {
                        c.Logf("error getting container state: %s", err)
@@ -1267,6 +1279,7 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token stri
                        time.Sleep(time.Second / 2)
                }
        }
+       checkstatus()
        c.Logf("cr.CumulativeCost == %f", cr.CumulativeCost)
        c.Check(cr.CumulativeCost, check.Not(check.Equals), 0.0)
        if expectExitCode >= 0 {
@@ -1274,6 +1287,13 @@ func (s *IntegrationSuite) runContainer(c *check.C, clusterID string, token stri
                c.Check(ctr.ExitCode, check.Equals, expectExitCode)
                err = ac.RequestAndDecode(&outcoll, "GET", "/arvados/v1/collections/"+cr.OutputUUID, nil, nil)
                c.Assert(err, check.IsNil)
+               c.Check(allStatus, check.Matches, `Queued, waiting for dispatch\n`+
+                       `(Queued, waiting.*\n)*`+
+                       `(Locked, waiting for dispatch\n)?`+
+                       `(Locked, waiting for new instance to be ready\n)?`+
+                       `(Locked, preparing runtime environment\n)?`+
+                       `(Running, \n)?`+
+                       `Complete, \n`)
        }
        logcfs = showlogs(cr.LogUUID)
        checkwebdavlogs(cr)
index 49e21840ea206f69684738e2f9aec98f0f6c2fd3..0234ee8fa6e618fa9d095c938cb2721ae69bda90 100644 (file)
@@ -6,8 +6,15 @@ package localdb
 
 import (
        "context"
+       "encoding/json"
+       "fmt"
+       "net/http"
+       "net/url"
 
+       "git.arvados.org/arvados.git/lib/dispatchcloud/scheduler"
        "git.arvados.org/arvados.git/sdk/go/arvados"
+       "git.arvados.org/arvados.git/sdk/go/auth"
+       "git.arvados.org/arvados.git/sdk/go/httpserver"
 )
 
 // ContainerRequestCreate defers to railsProxy for everything except
@@ -54,3 +61,87 @@ func (conn *Conn) ContainerRequestDelete(ctx context.Context, opts arvados.Delet
        conn.logActivity(ctx)
        return conn.railsProxy.ContainerRequestDelete(ctx, opts)
 }
+
+func (conn *Conn) ContainerRequestContainerStatus(ctx context.Context, opts arvados.GetOptions) (arvados.ContainerStatus, error) {
+       conn.logActivity(ctx)
+       var ret arvados.ContainerStatus
+       cr, err := conn.railsProxy.ContainerRequestGet(ctx, arvados.GetOptions{UUID: opts.UUID, Select: []string{"uuid", "container_uuid", "log_uuid"}})
+       if err != nil {
+               return ret, err
+       }
+       if cr.ContainerUUID == "" {
+               ret.SchedulingStatus = "no container assigned"
+               return ret, nil
+       }
+       // We use admin credentials to get the container record so we
+       // don't get an error when we're in a race with auto-retry and
+       // the container became user-unreadable since we fetched the
+       // CR above.
+       ctxRoot := auth.NewContext(ctx, &auth.Credentials{Tokens: []string{conn.cluster.SystemRootToken}})
+       ctr, err := conn.railsProxy.ContainerGet(ctxRoot, arvados.GetOptions{UUID: cr.ContainerUUID, Select: []string{"uuid", "state", "priority"}})
+       if err != nil {
+               return ret, err
+       }
+       ret.UUID = ctr.UUID
+       ret.State = ctr.State
+       if ctr.State != arvados.ContainerStateQueued && ctr.State != arvados.ContainerStateLocked {
+               // Scheduling status is not a thing once the container
+               // is in running state.
+               return ret, nil
+       }
+       var lastErr error
+       for dispatchurl := range conn.cluster.Services.DispatchCloud.InternalURLs {
+               baseurl := url.URL(dispatchurl)
+               apiurl, err := baseurl.Parse("/arvados/v1/dispatch/container?container_uuid=" + cr.ContainerUUID)
+               if err != nil {
+                       lastErr = err
+                       continue
+               }
+               req, err := http.NewRequestWithContext(ctx, http.MethodGet, apiurl.String(), nil)
+               if err != nil {
+                       lastErr = err
+                       continue
+               }
+               req.Header.Set("Authorization", "Bearer "+conn.cluster.ManagementToken)
+               resp, err := http.DefaultClient.Do(req)
+               if err != nil {
+                       lastErr = fmt.Errorf("error getting status from dispatcher: %w", err)
+                       continue
+               }
+               if resp.StatusCode == http.StatusNotFound {
+                       continue
+               } else if resp.StatusCode != http.StatusOK {
+                       lastErr = fmt.Errorf("error getting status from dispatcher: %s", resp.Status)
+                       continue
+               }
+               var qent scheduler.QueueEnt
+               err = json.NewDecoder(resp.Body).Decode(&qent)
+               if err != nil {
+                       lastErr = err
+                       continue
+               }
+               ret.State = qent.Container.State // Prefer dispatcher's view of state if not equal to ctr.State
+               ret.SchedulingStatus = qent.SchedulingStatus
+               return ret, nil
+       }
+       if lastErr != nil {
+               // If we got a non-nil error from a dispatchcloud
+               // service, and the container state suggests
+               // dispatchcloud should know about it, then we return
+               // an error so the client knows to retry.
+               return ret, httpserver.ErrorWithStatus(lastErr, http.StatusBadGateway)
+       }
+       // All running dispatchcloud services confirm they don't have
+       // this container (the dispatcher hasn't yet noticed it
+       // appearing in the queue) or there are no dispatchcloud
+       // services configured. Either way, all we can say is that
+       // it's queued.
+       if ctr.State == arvados.ContainerStateQueued && ctr.Priority < 1 {
+               // If it hasn't been picked up by a dispatcher
+               // already, it won't be -- it's just on hold.
+               // Scheduling status does not apply.
+               return ret, nil
+       }
+       ret.SchedulingStatus = "waiting for dispatch"
+       return ret, nil
+}
index d39f493a956b21d66b38182addb36f9462d57736..054bcffaf7ecf33b12965bb8e0d0be2d9590e1e0 100644 (file)
@@ -318,6 +318,13 @@ func (rtr *router) addRoutes() {
                                return rtr.backend.ContainerRequestDelete(ctx, *opts.(*arvados.DeleteOptions))
                        },
                },
+               {
+                       arvados.EndpointContainerRequestContainerStatus,
+                       func() interface{} { return &arvados.GetOptions{} },
+                       func(ctx context.Context, opts interface{}) (interface{}, error) {
+                               return rtr.backend.ContainerRequestContainerStatus(ctx, *opts.(*arvados.GetOptions))
+                       },
+               },
                {
                        arvados.EndpointContainerRequestLog,
                        func() interface{} { return &arvados.ContainerLogOptions{} },
index 9f518d9c7a91027a0ddc6552a341455b14b5f256..c6be679a256cb2e860d5ce179646e3378219d1c6 100644 (file)
@@ -529,6 +529,13 @@ func (conn *Conn) ContainerRequestDelete(ctx context.Context, options arvados.De
        return resp, err
 }
 
+func (conn *Conn) ContainerRequestContainerStatus(ctx context.Context, options arvados.GetOptions) (arvados.ContainerStatus, error) {
+       ep := arvados.EndpointContainerRequestContainerStatus
+       var resp arvados.ContainerStatus
+       err := conn.requestAndDecode(ctx, &resp, ep, nil, options)
+       return resp, err
+}
+
 func (conn *Conn) ContainerRequestLog(ctx context.Context, options arvados.ContainerLogOptions) (resp http.Handler, err error) {
        proxy := &httputil.ReverseProxy{
                Transport: conn.httpClient.Transport,
index 47e60abdee4744689f18ecd2094fd606188982d3..04283df48f6faf60bd0968b327a6e953c41d6d18 100644 (file)
@@ -61,14 +61,22 @@ type dispatcher struct {
        instanceSet cloud.InstanceSet
        pool        pool
        queue       scheduler.ContainerQueue
+       sched       *scheduler.Scheduler
        httpHandler http.Handler
        sshKey      ssh.Signer
 
        setupOnce sync.Once
        stop      chan struct{}
        stopped   chan struct{}
+
+       schedQueueMtx       sync.Mutex
+       schedQueueRefreshed time.Time
+       schedQueue          []scheduler.QueueEnt
+       schedQueueMap       map[string]scheduler.QueueEnt
 }
 
+var schedQueueRefresh = time.Second
+
 // Start starts the dispatcher. Start can be called multiple times
 // with no ill effect.
 func (disp *dispatcher) Start() {
@@ -155,7 +163,22 @@ func (disp *dispatcher) initialize() {
        dblock.Dispatch.Lock(disp.Context, disp.dbConnector.GetDB)
        disp.instanceSet = instanceSet
        disp.pool = worker.NewPool(disp.logger, disp.ArvClient, disp.Registry, disp.InstanceSetID, disp.instanceSet, disp.newExecutor, installPublicKey, disp.Cluster)
-       disp.queue = container.NewQueue(disp.logger, disp.Registry, disp.typeChooser, disp.ArvClient)
+       if disp.queue == nil {
+               disp.queue = container.NewQueue(disp.logger, disp.Registry, disp.typeChooser, disp.ArvClient)
+       }
+
+       staleLockTimeout := time.Duration(disp.Cluster.Containers.StaleLockTimeout)
+       if staleLockTimeout == 0 {
+               staleLockTimeout = defaultStaleLockTimeout
+       }
+       pollInterval := time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval)
+       if pollInterval <= 0 {
+               pollInterval = defaultPollInterval
+       }
+       disp.sched = scheduler.New(disp.Context, disp.ArvClient, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval,
+               disp.Cluster.Containers.CloudVMs.InitialQuotaEstimate,
+               disp.Cluster.Containers.CloudVMs.MaxInstances,
+               disp.Cluster.Containers.CloudVMs.SupervisorFraction)
 
        if disp.Cluster.ManagementToken == "" {
                disp.httpHandler = http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
@@ -164,6 +187,7 @@ func (disp *dispatcher) initialize() {
        } else {
                mux := httprouter.New()
                mux.HandlerFunc("GET", "/arvados/v1/dispatch/containers", disp.apiContainers)
+               mux.HandlerFunc("GET", "/arvados/v1/dispatch/container", disp.apiContainer)
                mux.HandlerFunc("POST", "/arvados/v1/dispatch/containers/kill", disp.apiContainerKill)
                mux.HandlerFunc("GET", "/arvados/v1/dispatch/instances", disp.apiInstances)
                mux.HandlerFunc("POST", "/arvados/v1/dispatch/instances/hold", disp.apiInstanceHold)
@@ -190,36 +214,53 @@ func (disp *dispatcher) run() {
        defer disp.instanceSet.Stop()
        defer disp.pool.Stop()
 
-       staleLockTimeout := time.Duration(disp.Cluster.Containers.StaleLockTimeout)
-       if staleLockTimeout == 0 {
-               staleLockTimeout = defaultStaleLockTimeout
-       }
-       pollInterval := time.Duration(disp.Cluster.Containers.CloudVMs.PollInterval)
-       if pollInterval <= 0 {
-               pollInterval = defaultPollInterval
-       }
-       sched := scheduler.New(disp.Context, disp.ArvClient, disp.queue, disp.pool, disp.Registry, staleLockTimeout, pollInterval,
-               disp.Cluster.Containers.CloudVMs.InitialQuotaEstimate,
-               disp.Cluster.Containers.CloudVMs.MaxInstances,
-               disp.Cluster.Containers.CloudVMs.SupervisorFraction)
-       sched.Start()
-       defer sched.Stop()
+       disp.sched.Start()
+       defer disp.sched.Stop()
 
        <-disp.stop
 }
 
-// Management API: all active and queued containers.
+// Get a snapshot of the scheduler's queue, no older than
+// schedQueueRefresh.
+//
+// First return value is in the sorted order used by the scheduler.
+// Second return value is a map of the same entries, for efficiently
+// looking up a single container.
+func (disp *dispatcher) schedQueueCurrent() ([]scheduler.QueueEnt, map[string]scheduler.QueueEnt) {
+       disp.schedQueueMtx.Lock()
+       defer disp.schedQueueMtx.Unlock()
+       if time.Since(disp.schedQueueRefreshed) > schedQueueRefresh {
+               disp.schedQueue = disp.sched.Queue()
+               disp.schedQueueMap = make(map[string]scheduler.QueueEnt)
+               for _, ent := range disp.schedQueue {
+                       disp.schedQueueMap[ent.Container.UUID] = ent
+               }
+               disp.schedQueueRefreshed = time.Now()
+       }
+       return disp.schedQueue, disp.schedQueueMap
+}
+
+// Management API: scheduling queue entries for all active and queued
+// containers.
 func (disp *dispatcher) apiContainers(w http.ResponseWriter, r *http.Request) {
        var resp struct {
-               Items []container.QueueEnt `json:"items"`
-       }
-       qEntries, _ := disp.queue.Entries()
-       for _, ent := range qEntries {
-               resp.Items = append(resp.Items, ent)
+               Items []scheduler.QueueEnt `json:"items"`
        }
+       resp.Items, _ = disp.schedQueueCurrent()
        json.NewEncoder(w).Encode(resp)
 }
 
+// Management API: scheduling queue entry for a specified container.
+func (disp *dispatcher) apiContainer(w http.ResponseWriter, r *http.Request) {
+       _, sq := disp.schedQueueCurrent()
+       ent, ok := sq[r.FormValue("container_uuid")]
+       if !ok {
+               httpserver.Error(w, "container not found", http.StatusNotFound)
+               return
+       }
+       json.NewEncoder(w).Encode(ent)
+}
+
 // Management API: all active instances (cloud VMs).
 func (disp *dispatcher) apiInstances(w http.ResponseWriter, r *http.Request) {
        var resp struct {
index 20185554b8b1828fc92e24b1c1f7ecbc8603b6fc..d651e73a67c341bbd8df7ac2465bcecc060aa042 100644 (file)
@@ -8,12 +8,14 @@ import (
        "context"
        "crypto/tls"
        "encoding/json"
+       "fmt"
        "io/ioutil"
        "math/rand"
        "net/http"
        "net/http/httptest"
        "net/url"
        "os"
+       "strings"
        "sync"
        "sync/atomic"
        "time"
@@ -127,6 +129,10 @@ func (s *DispatcherSuite) SetUpTest(c *check.C) {
                ArvClient: arvClient,
                AuthToken: arvadostest.AdminToken,
                Registry:  prometheus.NewRegistry(),
+               // Providing a stub queue here prevents
+               // disp.initialize() from making a real one that uses
+               // the integration test servers/database.
+               queue: &test.Queue{},
        }
        // Test cases can modify s.cluster before calling
        // initialize(), and then modify private state before calling
@@ -159,7 +165,6 @@ func (s *DispatcherSuite) arvClientProxy(c *check.C) func(*http.Request) (*url.U
 // artificial errors in order to exercise a variety of code paths.
 func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
        Drivers["test"] = s.stubDriver
-       s.disp.setupOnce.Do(s.disp.initialize)
        queue := &test.Queue{
                MaxDispatchAttempts: 5,
                ChooseType: func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
@@ -179,6 +184,7 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
                })
        }
        s.disp.queue = queue
+       s.disp.setupOnce.Do(s.disp.initialize)
 
        var mtx sync.Mutex
        done := make(chan struct{})
@@ -323,11 +329,10 @@ func (s *DispatcherSuite) TestDispatchToStubDriver(c *check.C) {
        c.Check(resp.Body.String(), check.Matches, `(?ms).*max_concurrent_containers [1-9][0-9e+.]*`)
 }
 
-func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
+func (s *DispatcherSuite) TestManagementAPI_Permissions(c *check.C) {
        s.cluster.ManagementToken = "abcdefgh"
        Drivers["test"] = s.stubDriver
        s.disp.setupOnce.Do(s.disp.initialize)
-       s.disp.queue = &test.Queue{}
        go s.disp.run()
 
        for _, token := range []string{"abc", ""} {
@@ -345,11 +350,10 @@ func (s *DispatcherSuite) TestAPIPermissions(c *check.C) {
        }
 }
 
-func (s *DispatcherSuite) TestAPIDisabled(c *check.C) {
+func (s *DispatcherSuite) TestManagementAPI_Disabled(c *check.C) {
        s.cluster.ManagementToken = ""
        Drivers["test"] = s.stubDriver
        s.disp.setupOnce.Do(s.disp.initialize)
-       s.disp.queue = &test.Queue{}
        go s.disp.run()
 
        for _, token := range []string{"abc", ""} {
@@ -363,13 +367,121 @@ func (s *DispatcherSuite) TestAPIDisabled(c *check.C) {
        }
 }
 
-func (s *DispatcherSuite) TestInstancesAPI(c *check.C) {
+func (s *DispatcherSuite) TestManagementAPI_Containers(c *check.C) {
+       s.cluster.ManagementToken = "abcdefgh"
+       s.cluster.Containers.CloudVMs.InitialQuotaEstimate = 4
+       Drivers["test"] = s.stubDriver
+       queue := &test.Queue{
+               MaxDispatchAttempts: 5,
+               ChooseType: func(ctr *arvados.Container) ([]arvados.InstanceType, error) {
+                       return ChooseInstanceType(s.cluster, ctr)
+               },
+               Logger: ctxlog.TestLogger(c),
+       }
+       s.stubDriver.Queue = queue
+       s.stubDriver.QuotaMaxInstances = 4
+       s.stubDriver.SetupVM = func(stubvm *test.StubVM) error {
+               if stubvm.Instance().ProviderType() >= test.InstanceType(4).ProviderType {
+                       return test.CapacityError{InstanceTypeSpecific: true}
+               }
+               stubvm.ExecuteContainer = func(ctr arvados.Container) int {
+                       time.Sleep(5 * time.Second)
+                       return 0
+               }
+               return nil
+       }
+       s.disp.queue = queue
+       s.disp.setupOnce.Do(s.disp.initialize)
+
+       go s.disp.run()
+
+       type queueEnt struct {
+               Container        arvados.Container
+               InstanceType     arvados.InstanceType `json:"instance_type"`
+               SchedulingStatus string               `json:"scheduling_status"`
+       }
+       type containersResponse struct {
+               Items []queueEnt
+       }
+       getContainers := func() containersResponse {
+               schedQueueRefresh = time.Millisecond
+               req := httptest.NewRequest("GET", "/arvados/v1/dispatch/containers", nil)
+               req.Header.Set("Authorization", "Bearer abcdefgh")
+               resp := httptest.NewRecorder()
+               s.disp.ServeHTTP(resp, req)
+               var cresp containersResponse
+               c.Check(resp.Code, check.Equals, http.StatusOK)
+               err := json.Unmarshal(resp.Body.Bytes(), &cresp)
+               c.Check(err, check.IsNil)
+               return cresp
+       }
+
+       c.Check(getContainers().Items, check.HasLen, 0)
+
+       for i := 0; i < 20; i++ {
+               queue.Containers = append(queue.Containers, arvados.Container{
+                       UUID:     test.ContainerUUID(i),
+                       State:    arvados.ContainerStateQueued,
+                       Priority: int64(100 - i),
+                       RuntimeConstraints: arvados.RuntimeConstraints{
+                               RAM:   int64(i%3+1) << 30,
+                               VCPUs: i%8 + 1,
+                       },
+               })
+       }
+       queue.Update()
+
+       expect := `
+ 0 zzzzz-dz642-000000000000000 (Running) ""
+ 1 zzzzz-dz642-000000000000001 (Running) ""
+ 2 zzzzz-dz642-000000000000002 (Locked) "waiting for suitable instance type to become available: queue position 1"
+ 3 zzzzz-dz642-000000000000003 (Locked) "waiting for suitable instance type to become available: queue position 2"
+ 4 zzzzz-dz642-000000000000004 (Queued) "waiting while cluster is running at capacity: queue position 3"
+ 5 zzzzz-dz642-000000000000005 (Queued) "waiting while cluster is running at capacity: queue position 4"
+ 6 zzzzz-dz642-000000000000006 (Queued) "waiting while cluster is running at capacity: queue position 5"
+ 7 zzzzz-dz642-000000000000007 (Queued) "waiting while cluster is running at capacity: queue position 6"
+ 8 zzzzz-dz642-000000000000008 (Queued) "waiting while cluster is running at capacity: queue position 7"
+ 9 zzzzz-dz642-000000000000009 (Queued) "waiting while cluster is running at capacity: queue position 8"
+ 10 zzzzz-dz642-000000000000010 (Queued) "waiting while cluster is running at capacity: queue position 9"
+ 11 zzzzz-dz642-000000000000011 (Queued) "waiting while cluster is running at capacity: queue position 10"
+ 12 zzzzz-dz642-000000000000012 (Queued) "waiting while cluster is running at capacity: queue position 11"
+ 13 zzzzz-dz642-000000000000013 (Queued) "waiting while cluster is running at capacity: queue position 12"
+ 14 zzzzz-dz642-000000000000014 (Queued) "waiting while cluster is running at capacity: queue position 13"
+ 15 zzzzz-dz642-000000000000015 (Queued) "waiting while cluster is running at capacity: queue position 14"
+ 16 zzzzz-dz642-000000000000016 (Queued) "waiting while cluster is running at capacity: queue position 15"
+ 17 zzzzz-dz642-000000000000017 (Queued) "waiting while cluster is running at capacity: queue position 16"
+ 18 zzzzz-dz642-000000000000018 (Queued) "waiting while cluster is running at capacity: queue position 17"
+ 19 zzzzz-dz642-000000000000019 (Queued) "waiting while cluster is running at capacity: queue position 18"
+`
+       sequence := make(map[string][]string)
+       var summary string
+       for deadline := time.Now().Add(time.Second); time.Now().Before(deadline); time.Sleep(time.Millisecond) {
+               cresp := getContainers()
+               summary = "\n"
+               for i, ent := range cresp.Items {
+                       summary += fmt.Sprintf("% 2d %s (%s) %q\n", i, ent.Container.UUID, ent.Container.State, ent.SchedulingStatus)
+                       s := sequence[ent.Container.UUID]
+                       if len(s) == 0 || s[len(s)-1] != ent.SchedulingStatus {
+                               sequence[ent.Container.UUID] = append(s, ent.SchedulingStatus)
+                       }
+               }
+               if summary == expect {
+                       break
+               }
+       }
+       c.Check(summary, check.Equals, expect)
+       for i := 0; i < 5; i++ {
+               c.Logf("sequence for container %d:\n... %s", i, strings.Join(sequence[test.ContainerUUID(i)], "\n... "))
+       }
+}
+
+func (s *DispatcherSuite) TestManagementAPI_Instances(c *check.C) {
        s.cluster.ManagementToken = "abcdefgh"
        s.cluster.Containers.CloudVMs.TimeoutBooting = arvados.Duration(time.Second)
        Drivers["test"] = s.stubDriver
        s.disp.setupOnce.Do(s.disp.initialize)
-       s.disp.queue = &test.Queue{}
        go s.disp.run()
+       defer s.disp.Close()
 
        type instance struct {
                Instance             string
index 03fa592777e6fa7c09eb57031bb19c3bdeb80029..d2709722956cbf22b0b909c8e4fcf508a2ff4c0a 100644 (file)
@@ -5,6 +5,7 @@
 package scheduler
 
 import (
+       "fmt"
        "sort"
        "time"
 
@@ -15,6 +16,31 @@ import (
 
 var quietAfter503 = time.Minute
 
+type QueueEnt struct {
+       container.QueueEnt
+
+       // Human-readable scheduling status as of the last scheduling
+       // iteration.
+       SchedulingStatus string `json:"scheduling_status"`
+}
+
+const (
+       schedStatusPreparingRuntimeEnvironment = "preparing runtime environment"
+       schedStatusPriorityZero                = "not scheduling: priority 0" // ", state X" appended at runtime
+       schedStatusContainerLimitReached       = "not starting: supervisor container limit has been reached"
+       schedStatusWaitingForPreviousAttempt   = "waiting for previous attempt to exit"
+       schedStatusWaitingNewInstance          = "waiting for new instance to be ready"
+       schedStatusWaitingInstanceType         = "waiting for suitable instance type to become available" // ": queue position X" appended at runtime
+       schedStatusWaitingCloudResources       = "waiting for cloud resources"
+       schedStatusWaitingClusterCapacity      = "waiting while cluster is running at capacity" // ": queue position X" appended at runtime
+)
+
+// Queue returns the sorted queue from the last scheduling iteration.
+func (sch *Scheduler) Queue() []QueueEnt {
+       ents, _ := sch.lastQueue.Load().([]QueueEnt)
+       return ents
+}
+
 func (sch *Scheduler) runQueue() {
        running := sch.pool.Running()
        unalloc := sch.pool.Unallocated()
@@ -25,9 +51,9 @@ func (sch *Scheduler) runQueue() {
        }
 
        unsorted, _ := sch.queue.Entries()
-       sorted := make([]container.QueueEnt, 0, len(unsorted))
+       sorted := make([]QueueEnt, 0, len(unsorted))
        for _, ent := range unsorted {
-               sorted = append(sorted, ent)
+               sorted = append(sorted, QueueEnt{QueueEnt: ent})
        }
        sort.Slice(sorted, func(i, j int) bool {
                _, irunning := running[sorted[i].Container.UUID]
@@ -149,9 +175,9 @@ func (sch *Scheduler) runQueue() {
        }).Debug("runQueue")
 
        dontstart := map[arvados.InstanceType]bool{}
-       var atcapacity = map[string]bool{}    // ProviderTypes reported as AtCapacity during this runQueue() invocation
-       var overquota []container.QueueEnt    // entries that are unmappable because of worker pool quota
-       var overmaxsuper []container.QueueEnt // unmappable because max supervisors (these are not included in overquota)
+       var atcapacity = map[string]bool{} // ProviderTypes reported as AtCapacity during this runQueue() invocation
+       var overquota []QueueEnt           // entries that are unmappable because of worker pool quota
+       var overmaxsuper []QueueEnt        // unmappable because max supervisors (these are not included in overquota)
        var containerAllocatedWorkerBootingCount int
 
        // trying is #containers running + #containers we're trying to
@@ -159,6 +185,7 @@ func (sch *Scheduler) runQueue() {
        // reaches the dynamic maxConcurrency limit.
        trying := len(running)
 
+       qpos := 0
        supervisors := 0
 
 tryrun:
@@ -169,12 +196,20 @@ tryrun:
                })
                if ctr.SchedulingParameters.Supervisor {
                        supervisors += 1
-                       if maxSupervisors > 0 && supervisors > maxSupervisors {
-                               overmaxsuper = append(overmaxsuper, sorted[i])
-                               continue
+               }
+               if _, running := running[ctr.UUID]; running {
+                       if ctr.State == arvados.ContainerStateQueued || ctr.State == arvados.ContainerStateLocked {
+                               sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
                        }
+                       continue
                }
-               if _, running := running[ctr.UUID]; running || ctr.Priority < 1 {
+               if ctr.Priority < 1 {
+                       sorted[i].SchedulingStatus = schedStatusPriorityZero + ", state " + string(ctr.State)
+                       continue
+               }
+               if ctr.SchedulingParameters.Supervisor && maxSupervisors > 0 && supervisors > maxSupervisors {
+                       overmaxsuper = append(overmaxsuper, sorted[i])
+                       sorted[i].SchedulingStatus = schedStatusContainerLimitReached
                        continue
                }
                // If we have unalloc instances of any of the eligible
@@ -214,7 +249,7 @@ tryrun:
                        }
                        trying++
                        if !unallocOK && sch.pool.AtQuota() {
-                               logger.Trace("not locking: AtQuota and no unalloc workers")
+                               logger.Trace("not starting: AtQuota and no unalloc workers")
                                overquota = sorted[i:]
                                break tryrun
                        }
@@ -246,10 +281,13 @@ tryrun:
                                        // same instance type. Don't let this
                                        // one sneak in ahead of it.
                                } else if sch.pool.KillContainer(ctr.UUID, "about to start") {
+                                       sorted[i].SchedulingStatus = schedStatusWaitingForPreviousAttempt
                                        logger.Info("not restarting yet: crunch-run process from previous attempt has not exited")
                                } else if sch.pool.StartContainer(unallocType, ctr) {
+                                       sorted[i].SchedulingStatus = schedStatusPreparingRuntimeEnvironment
                                        logger.Trace("StartContainer => true")
                                } else {
+                                       sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
                                        logger.Trace("StartContainer => false")
                                        containerAllocatedWorkerBootingCount += 1
                                        dontstart[unallocType] = true
@@ -279,6 +317,8 @@ tryrun:
                                // container A on the next call to
                                // runQueue(), rather than run
                                // container B now.
+                               qpos++
+                               sorted[i].SchedulingStatus = schedStatusWaitingInstanceType + fmt.Sprintf(": queue position %d", qpos)
                                logger.Trace("all eligible types at capacity")
                                continue
                        }
@@ -293,6 +333,7 @@ tryrun:
                        // asynchronously and does its own logging
                        // about the eventual outcome, so we don't
                        // need to.)
+                       sorted[i].SchedulingStatus = schedStatusWaitingNewInstance
                        logger.Info("creating new instance")
                        // Don't bother trying to start the container
                        // yet -- obviously the instance will take
@@ -305,12 +346,26 @@ tryrun:
        sch.mContainersAllocatedNotStarted.Set(float64(containerAllocatedWorkerBootingCount))
        sch.mContainersNotAllocatedOverQuota.Set(float64(len(overquota) + len(overmaxsuper)))
 
+       var qreason string
+       if sch.pool.AtQuota() {
+               qreason = schedStatusWaitingCloudResources
+       } else {
+               qreason = schedStatusWaitingClusterCapacity
+       }
+       for i, ent := range sorted {
+               if ent.SchedulingStatus == "" && (ent.Container.State == arvados.ContainerStateQueued || ent.Container.State == arvados.ContainerStateLocked) {
+                       qpos++
+                       sorted[i].SchedulingStatus = fmt.Sprintf("%s: queue position %d", qreason, qpos)
+               }
+       }
+       sch.lastQueue.Store(sorted)
+
        if len(overquota)+len(overmaxsuper) > 0 {
                // Unlock any containers that are unmappable while
                // we're at quota (but if they have already been
                // scheduled and they're loading docker images etc.,
                // let them run).
-               var unlock []container.QueueEnt
+               var unlock []QueueEnt
                unlock = append(unlock, overmaxsuper...)
                if totalInstances > 0 && len(overquota) > 1 {
                        // We don't unlock the next-in-line container
index ee7ab508839622af6f108cf6b98e6b48f6def603..bc6574a21a538134c618320f9e97511b84d9b307 100644 (file)
@@ -9,6 +9,7 @@ package scheduler
 import (
        "context"
        "sync"
+       "sync/atomic"
        "time"
 
        "git.arvados.org/arvados.git/sdk/go/arvados"
@@ -57,6 +58,8 @@ type Scheduler struct {
        mLongestWaitTimeSinceQueue       prometheus.Gauge
        mLast503Time                     prometheus.Gauge
        mMaxContainerConcurrency         prometheus.Gauge
+
+       lastQueue atomic.Value // stores a []QueueEnt
 }
 
 // New returns a new unstarted Scheduler.
index e7310818f7d745b55fda3ceed59bf2438bfab9e1..c3d0ea8aef676b3d3c57ce0bfbbcbe129b7689ac 100644 (file)
@@ -23,90 +23,91 @@ type APIEndpoint struct {
 }
 
 var (
-       EndpointConfigGet                     = APIEndpoint{"GET", "arvados/v1/config", ""}
-       EndpointVocabularyGet                 = APIEndpoint{"GET", "arvados/v1/vocabulary", ""}
-       EndpointDiscoveryDocument             = APIEndpoint{"GET", "discovery/v1/apis/arvados/v1/rest", ""}
-       EndpointLogin                         = APIEndpoint{"GET", "login", ""}
-       EndpointLogout                        = APIEndpoint{"GET", "logout", ""}
-       EndpointAuthorizedKeyCreate           = APIEndpoint{"POST", "arvados/v1/authorized_keys", "authorized_key"}
-       EndpointAuthorizedKeyUpdate           = APIEndpoint{"PATCH", "arvados/v1/authorized_keys/{uuid}", "authorized_key"}
-       EndpointAuthorizedKeyGet              = APIEndpoint{"GET", "arvados/v1/authorized_keys/{uuid}", ""}
-       EndpointAuthorizedKeyList             = APIEndpoint{"GET", "arvados/v1/authorized_keys", ""}
-       EndpointAuthorizedKeyDelete           = APIEndpoint{"DELETE", "arvados/v1/authorized_keys/{uuid}", ""}
-       EndpointCollectionCreate              = APIEndpoint{"POST", "arvados/v1/collections", "collection"}
-       EndpointCollectionUpdate              = APIEndpoint{"PATCH", "arvados/v1/collections/{uuid}", "collection"}
-       EndpointCollectionGet                 = APIEndpoint{"GET", "arvados/v1/collections/{uuid}", ""}
-       EndpointCollectionList                = APIEndpoint{"GET", "arvados/v1/collections", ""}
-       EndpointCollectionProvenance          = APIEndpoint{"GET", "arvados/v1/collections/{uuid}/provenance", ""}
-       EndpointCollectionUsedBy              = APIEndpoint{"GET", "arvados/v1/collections/{uuid}/used_by", ""}
-       EndpointCollectionDelete              = APIEndpoint{"DELETE", "arvados/v1/collections/{uuid}", ""}
-       EndpointCollectionTrash               = APIEndpoint{"POST", "arvados/v1/collections/{uuid}/trash", ""}
-       EndpointCollectionUntrash             = APIEndpoint{"POST", "arvados/v1/collections/{uuid}/untrash", ""}
-       EndpointSpecimenCreate                = APIEndpoint{"POST", "arvados/v1/specimens", "specimen"}
-       EndpointSpecimenUpdate                = APIEndpoint{"PATCH", "arvados/v1/specimens/{uuid}", "specimen"}
-       EndpointSpecimenGet                   = APIEndpoint{"GET", "arvados/v1/specimens/{uuid}", ""}
-       EndpointSpecimenList                  = APIEndpoint{"GET", "arvados/v1/specimens", ""}
-       EndpointSpecimenDelete                = APIEndpoint{"DELETE", "arvados/v1/specimens/{uuid}", ""}
-       EndpointContainerCreate               = APIEndpoint{"POST", "arvados/v1/containers", "container"}
-       EndpointContainerUpdate               = APIEndpoint{"PATCH", "arvados/v1/containers/{uuid}", "container"}
-       EndpointContainerPriorityUpdate       = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/update_priority", "container"}
-       EndpointContainerGet                  = APIEndpoint{"GET", "arvados/v1/containers/{uuid}", ""}
-       EndpointContainerList                 = APIEndpoint{"GET", "arvados/v1/containers", ""}
-       EndpointContainerDelete               = APIEndpoint{"DELETE", "arvados/v1/containers/{uuid}", ""}
-       EndpointContainerLock                 = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/lock", ""}
-       EndpointContainerUnlock               = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/unlock", ""}
-       EndpointContainerSSH                  = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/ssh", ""}
-       EndpointContainerSSHCompat            = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/ssh", ""} // for compatibility with arvados <2.7
-       EndpointContainerGatewayTunnel        = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/gateway_tunnel", ""}
-       EndpointContainerGatewayTunnelCompat  = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/gateway_tunnel", ""} // for compatibility with arvados <2.7
-       EndpointContainerRequestCreate        = APIEndpoint{"POST", "arvados/v1/container_requests", "container_request"}
-       EndpointContainerRequestUpdate        = APIEndpoint{"PATCH", "arvados/v1/container_requests/{uuid}", "container_request"}
-       EndpointContainerRequestGet           = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}", ""}
-       EndpointContainerRequestList          = APIEndpoint{"GET", "arvados/v1/container_requests", ""}
-       EndpointContainerRequestDelete        = APIEndpoint{"DELETE", "arvados/v1/container_requests/{uuid}", ""}
-       EndpointContainerRequestLog           = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}/log{path:|/.*}", ""}
-       EndpointGroupCreate                   = APIEndpoint{"POST", "arvados/v1/groups", "group"}
-       EndpointGroupUpdate                   = APIEndpoint{"PATCH", "arvados/v1/groups/{uuid}", "group"}
-       EndpointGroupGet                      = APIEndpoint{"GET", "arvados/v1/groups/{uuid}", ""}
-       EndpointGroupList                     = APIEndpoint{"GET", "arvados/v1/groups", ""}
-       EndpointGroupContents                 = APIEndpoint{"GET", "arvados/v1/groups/contents", ""}
-       EndpointGroupContentsUUIDInPath       = APIEndpoint{"GET", "arvados/v1/groups/{uuid}/contents", ""} // Alternative HTTP route; client-side code should always use EndpointGroupContents instead
-       EndpointGroupShared                   = APIEndpoint{"GET", "arvados/v1/groups/shared", ""}
-       EndpointGroupDelete                   = APIEndpoint{"DELETE", "arvados/v1/groups/{uuid}", ""}
-       EndpointGroupTrash                    = APIEndpoint{"POST", "arvados/v1/groups/{uuid}/trash", ""}
-       EndpointGroupUntrash                  = APIEndpoint{"POST", "arvados/v1/groups/{uuid}/untrash", ""}
-       EndpointLinkCreate                    = APIEndpoint{"POST", "arvados/v1/links", "link"}
-       EndpointLinkUpdate                    = APIEndpoint{"PATCH", "arvados/v1/links/{uuid}", "link"}
-       EndpointLinkGet                       = APIEndpoint{"GET", "arvados/v1/links/{uuid}", ""}
-       EndpointLinkList                      = APIEndpoint{"GET", "arvados/v1/links", ""}
-       EndpointLinkDelete                    = APIEndpoint{"DELETE", "arvados/v1/links/{uuid}", ""}
-       EndpointLogCreate                     = APIEndpoint{"POST", "arvados/v1/logs", "log"}
-       EndpointLogUpdate                     = APIEndpoint{"PATCH", "arvados/v1/logs/{uuid}", "log"}
-       EndpointLogGet                        = APIEndpoint{"GET", "arvados/v1/logs/{uuid}", ""}
-       EndpointLogList                       = APIEndpoint{"GET", "arvados/v1/logs", ""}
-       EndpointLogDelete                     = APIEndpoint{"DELETE", "arvados/v1/logs/{uuid}", ""}
-       EndpointSysTrashSweep                 = APIEndpoint{"POST", "sys/trash_sweep", ""}
-       EndpointUserActivate                  = APIEndpoint{"POST", "arvados/v1/users/{uuid}/activate", ""}
-       EndpointUserCreate                    = APIEndpoint{"POST", "arvados/v1/users", "user"}
-       EndpointUserCurrent                   = APIEndpoint{"GET", "arvados/v1/users/current", ""}
-       EndpointUserDelete                    = APIEndpoint{"DELETE", "arvados/v1/users/{uuid}", ""}
-       EndpointUserGet                       = APIEndpoint{"GET", "arvados/v1/users/{uuid}", ""}
-       EndpointUserGetCurrent                = APIEndpoint{"GET", "arvados/v1/users/current", ""}
-       EndpointUserGetSystem                 = APIEndpoint{"GET", "arvados/v1/users/system", ""}
-       EndpointUserList                      = APIEndpoint{"GET", "arvados/v1/users", ""}
-       EndpointUserMerge                     = APIEndpoint{"POST", "arvados/v1/users/merge", ""}
-       EndpointUserSetup                     = APIEndpoint{"POST", "arvados/v1/users/setup", "user"}
-       EndpointUserSystem                    = APIEndpoint{"GET", "arvados/v1/users/system", ""}
-       EndpointUserUnsetup                   = APIEndpoint{"POST", "arvados/v1/users/{uuid}/unsetup", ""}
-       EndpointUserUpdate                    = APIEndpoint{"PATCH", "arvados/v1/users/{uuid}", "user"}
-       EndpointUserBatchUpdate               = APIEndpoint{"PATCH", "arvados/v1/users/batch_update", ""}
-       EndpointUserAuthenticate              = APIEndpoint{"POST", "arvados/v1/users/authenticate", ""}
-       EndpointAPIClientAuthorizationCurrent = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/current", ""}
-       EndpointAPIClientAuthorizationCreate  = APIEndpoint{"POST", "arvados/v1/api_client_authorizations", "api_client_authorization"}
-       EndpointAPIClientAuthorizationUpdate  = APIEndpoint{"PUT", "arvados/v1/api_client_authorizations/{uuid}", "api_client_authorization"}
-       EndpointAPIClientAuthorizationList    = APIEndpoint{"GET", "arvados/v1/api_client_authorizations", ""}
-       EndpointAPIClientAuthorizationDelete  = APIEndpoint{"DELETE", "arvados/v1/api_client_authorizations/{uuid}", ""}
-       EndpointAPIClientAuthorizationGet     = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/{uuid}", ""}
+       EndpointConfigGet                       = APIEndpoint{"GET", "arvados/v1/config", ""}
+       EndpointVocabularyGet                   = APIEndpoint{"GET", "arvados/v1/vocabulary", ""}
+       EndpointDiscoveryDocument               = APIEndpoint{"GET", "discovery/v1/apis/arvados/v1/rest", ""}
+       EndpointLogin                           = APIEndpoint{"GET", "login", ""}
+       EndpointLogout                          = APIEndpoint{"GET", "logout", ""}
+       EndpointAuthorizedKeyCreate             = APIEndpoint{"POST", "arvados/v1/authorized_keys", "authorized_key"}
+       EndpointAuthorizedKeyUpdate             = APIEndpoint{"PATCH", "arvados/v1/authorized_keys/{uuid}", "authorized_key"}
+       EndpointAuthorizedKeyGet                = APIEndpoint{"GET", "arvados/v1/authorized_keys/{uuid}", ""}
+       EndpointAuthorizedKeyList               = APIEndpoint{"GET", "arvados/v1/authorized_keys", ""}
+       EndpointAuthorizedKeyDelete             = APIEndpoint{"DELETE", "arvados/v1/authorized_keys/{uuid}", ""}
+       EndpointCollectionCreate                = APIEndpoint{"POST", "arvados/v1/collections", "collection"}
+       EndpointCollectionUpdate                = APIEndpoint{"PATCH", "arvados/v1/collections/{uuid}", "collection"}
+       EndpointCollectionGet                   = APIEndpoint{"GET", "arvados/v1/collections/{uuid}", ""}
+       EndpointCollectionList                  = APIEndpoint{"GET", "arvados/v1/collections", ""}
+       EndpointCollectionProvenance            = APIEndpoint{"GET", "arvados/v1/collections/{uuid}/provenance", ""}
+       EndpointCollectionUsedBy                = APIEndpoint{"GET", "arvados/v1/collections/{uuid}/used_by", ""}
+       EndpointCollectionDelete                = APIEndpoint{"DELETE", "arvados/v1/collections/{uuid}", ""}
+       EndpointCollectionTrash                 = APIEndpoint{"POST", "arvados/v1/collections/{uuid}/trash", ""}
+       EndpointCollectionUntrash               = APIEndpoint{"POST", "arvados/v1/collections/{uuid}/untrash", ""}
+       EndpointSpecimenCreate                  = APIEndpoint{"POST", "arvados/v1/specimens", "specimen"}
+       EndpointSpecimenUpdate                  = APIEndpoint{"PATCH", "arvados/v1/specimens/{uuid}", "specimen"}
+       EndpointSpecimenGet                     = APIEndpoint{"GET", "arvados/v1/specimens/{uuid}", ""}
+       EndpointSpecimenList                    = APIEndpoint{"GET", "arvados/v1/specimens", ""}
+       EndpointSpecimenDelete                  = APIEndpoint{"DELETE", "arvados/v1/specimens/{uuid}", ""}
+       EndpointContainerCreate                 = APIEndpoint{"POST", "arvados/v1/containers", "container"}
+       EndpointContainerUpdate                 = APIEndpoint{"PATCH", "arvados/v1/containers/{uuid}", "container"}
+       EndpointContainerPriorityUpdate         = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/update_priority", "container"}
+       EndpointContainerGet                    = APIEndpoint{"GET", "arvados/v1/containers/{uuid}", ""}
+       EndpointContainerList                   = APIEndpoint{"GET", "arvados/v1/containers", ""}
+       EndpointContainerDelete                 = APIEndpoint{"DELETE", "arvados/v1/containers/{uuid}", ""}
+       EndpointContainerLock                   = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/lock", ""}
+       EndpointContainerUnlock                 = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/unlock", ""}
+       EndpointContainerSSH                    = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/ssh", ""}
+       EndpointContainerSSHCompat              = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/ssh", ""} // for compatibility with arvados <2.7
+       EndpointContainerGatewayTunnel          = APIEndpoint{"POST", "arvados/v1/containers/{uuid}/gateway_tunnel", ""}
+       EndpointContainerGatewayTunnelCompat    = APIEndpoint{"POST", "arvados/v1/connect/{uuid}/gateway_tunnel", ""} // for compatibility with arvados <2.7
+       EndpointContainerRequestCreate          = APIEndpoint{"POST", "arvados/v1/container_requests", "container_request"}
+       EndpointContainerRequestUpdate          = APIEndpoint{"PATCH", "arvados/v1/container_requests/{uuid}", "container_request"}
+       EndpointContainerRequestGet             = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}", ""}
+       EndpointContainerRequestList            = APIEndpoint{"GET", "arvados/v1/container_requests", ""}
+       EndpointContainerRequestDelete          = APIEndpoint{"DELETE", "arvados/v1/container_requests/{uuid}", ""}
+       EndpointContainerRequestContainerStatus = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}/container_status", ""}
+       EndpointContainerRequestLog             = APIEndpoint{"GET", "arvados/v1/container_requests/{uuid}/log{path:|/.*}", ""}
+       EndpointGroupCreate                     = APIEndpoint{"POST", "arvados/v1/groups", "group"}
+       EndpointGroupUpdate                     = APIEndpoint{"PATCH", "arvados/v1/groups/{uuid}", "group"}
+       EndpointGroupGet                        = APIEndpoint{"GET", "arvados/v1/groups/{uuid}", ""}
+       EndpointGroupList                       = APIEndpoint{"GET", "arvados/v1/groups", ""}
+       EndpointGroupContents                   = APIEndpoint{"GET", "arvados/v1/groups/contents", ""}
+       EndpointGroupContentsUUIDInPath         = APIEndpoint{"GET", "arvados/v1/groups/{uuid}/contents", ""} // Alternative HTTP route; client-side code should always use EndpointGroupContents instead
+       EndpointGroupShared                     = APIEndpoint{"GET", "arvados/v1/groups/shared", ""}
+       EndpointGroupDelete                     = APIEndpoint{"DELETE", "arvados/v1/groups/{uuid}", ""}
+       EndpointGroupTrash                      = APIEndpoint{"POST", "arvados/v1/groups/{uuid}/trash", ""}
+       EndpointGroupUntrash                    = APIEndpoint{"POST", "arvados/v1/groups/{uuid}/untrash", ""}
+       EndpointLinkCreate                      = APIEndpoint{"POST", "arvados/v1/links", "link"}
+       EndpointLinkUpdate                      = APIEndpoint{"PATCH", "arvados/v1/links/{uuid}", "link"}
+       EndpointLinkGet                         = APIEndpoint{"GET", "arvados/v1/links/{uuid}", ""}
+       EndpointLinkList                        = APIEndpoint{"GET", "arvados/v1/links", ""}
+       EndpointLinkDelete                      = APIEndpoint{"DELETE", "arvados/v1/links/{uuid}", ""}
+       EndpointLogCreate                       = APIEndpoint{"POST", "arvados/v1/logs", "log"}
+       EndpointLogUpdate                       = APIEndpoint{"PATCH", "arvados/v1/logs/{uuid}", "log"}
+       EndpointLogGet                          = APIEndpoint{"GET", "arvados/v1/logs/{uuid}", ""}
+       EndpointLogList                         = APIEndpoint{"GET", "arvados/v1/logs", ""}
+       EndpointLogDelete                       = APIEndpoint{"DELETE", "arvados/v1/logs/{uuid}", ""}
+       EndpointSysTrashSweep                   = APIEndpoint{"POST", "sys/trash_sweep", ""}
+       EndpointUserActivate                    = APIEndpoint{"POST", "arvados/v1/users/{uuid}/activate", ""}
+       EndpointUserCreate                      = APIEndpoint{"POST", "arvados/v1/users", "user"}
+       EndpointUserCurrent                     = APIEndpoint{"GET", "arvados/v1/users/current", ""}
+       EndpointUserDelete                      = APIEndpoint{"DELETE", "arvados/v1/users/{uuid}", ""}
+       EndpointUserGet                         = APIEndpoint{"GET", "arvados/v1/users/{uuid}", ""}
+       EndpointUserGetCurrent                  = APIEndpoint{"GET", "arvados/v1/users/current", ""}
+       EndpointUserGetSystem                   = APIEndpoint{"GET", "arvados/v1/users/system", ""}
+       EndpointUserList                        = APIEndpoint{"GET", "arvados/v1/users", ""}
+       EndpointUserMerge                       = APIEndpoint{"POST", "arvados/v1/users/merge", ""}
+       EndpointUserSetup                       = APIEndpoint{"POST", "arvados/v1/users/setup", "user"}
+       EndpointUserSystem                      = APIEndpoint{"GET", "arvados/v1/users/system", ""}
+       EndpointUserUnsetup                     = APIEndpoint{"POST", "arvados/v1/users/{uuid}/unsetup", ""}
+       EndpointUserUpdate                      = APIEndpoint{"PATCH", "arvados/v1/users/{uuid}", "user"}
+       EndpointUserBatchUpdate                 = APIEndpoint{"PATCH", "arvados/v1/users/batch_update", ""}
+       EndpointUserAuthenticate                = APIEndpoint{"POST", "arvados/v1/users/authenticate", ""}
+       EndpointAPIClientAuthorizationCurrent   = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/current", ""}
+       EndpointAPIClientAuthorizationCreate    = APIEndpoint{"POST", "arvados/v1/api_client_authorizations", "api_client_authorization"}
+       EndpointAPIClientAuthorizationUpdate    = APIEndpoint{"PUT", "arvados/v1/api_client_authorizations/{uuid}", "api_client_authorization"}
+       EndpointAPIClientAuthorizationList      = APIEndpoint{"GET", "arvados/v1/api_client_authorizations", ""}
+       EndpointAPIClientAuthorizationDelete    = APIEndpoint{"DELETE", "arvados/v1/api_client_authorizations/{uuid}", ""}
+       EndpointAPIClientAuthorizationGet       = APIEndpoint{"GET", "arvados/v1/api_client_authorizations/{uuid}", ""}
 )
 
 type ContainerSSHOptions struct {
@@ -310,6 +311,7 @@ type API interface {
        ContainerRequestGet(ctx context.Context, options GetOptions) (ContainerRequest, error)
        ContainerRequestList(ctx context.Context, options ListOptions) (ContainerRequestList, error)
        ContainerRequestDelete(ctx context.Context, options DeleteOptions) (ContainerRequest, error)
+       ContainerRequestContainerStatus(ctx context.Context, options GetOptions) (ContainerStatus, error)
        ContainerRequestLog(ctx context.Context, options ContainerLogOptions) (http.Handler, error)
        GroupCreate(ctx context.Context, options CreateOptions) (Group, error)
        GroupUpdate(ctx context.Context, options UpdateOptions) (Group, error)
index 2467e807a1253e2764ae657bb0ce78ee10399ee1..91c8fbfe2936d972b8c5f196467072a9d7715b84 100644 (file)
@@ -160,3 +160,9 @@ const (
        ContainerRequestStateCommitted  = ContainerRequestState("Committed")
        ContainerRequestStateFinal      = ContainerRequestState("Final")
 )
+
+type ContainerStatus struct {
+       UUID             string         `json:"uuid"`
+       State            ContainerState `json:"container_state"`
+       SchedulingStatus string         `json:"scheduling_status"`
+}
index 3ba794380f6f9ffd4fc755fdd732c82a3a2725fd..e1827b5d1f7995e3c3e01baa52ef016f349dcd95 100644 (file)
@@ -168,6 +168,10 @@ func (as *APIStub) ContainerRequestDelete(ctx context.Context, options arvados.D
        as.appendCall(ctx, as.ContainerRequestDelete, options)
        return arvados.ContainerRequest{}, as.Error
 }
+func (as *APIStub) ContainerRequestContainerStatus(ctx context.Context, options arvados.GetOptions) (arvados.ContainerStatus, error) {
+       as.appendCall(ctx, as.ContainerRequestContainerStatus, options)
+       return arvados.ContainerStatus{}, as.Error
+}
 func (as *APIStub) ContainerRequestLog(ctx context.Context, options arvados.ContainerLogOptions) (http.Handler, error) {
        as.appendCall(ctx, as.ContainerRequestLog, options)
        // Return a handler that responds with the configured
index 6793893ff1204d6d3bf6ce98c76d00929ccfc055..232c88d0678eff48e64c366f386f549e62ed9c74 100644 (file)
             "https://api.arvados.org/auth/arvados"
           ]
         },
+        "container_status": {
+          "id": "arvados.container_requests.container_status",
+          "path": "container_requests/{uuid}/container_status",
+          "httpMethod": "GET",
+          "description": "container_status container_requests",
+          "parameters": {
+            "uuid": {
+              "type": "string",
+              "required": true,
+              "description": "The UUID of the ContainerRequest in question.",
+              "location": "query"
+            }
+          },
+          "response": {
+            "$ref": "ContainerRequest"
+          },
+          "scopes": [
+            "https://api.arvados.org/auth/arvados"
+          ]
+        },
         "list": {
           "id": "arvados.container_requests.list",
           "path": "container_requests",
index 917c876706ffb2bcab4189d5fca1da430b37a1fc..88a916e659e54643468536a11c94474ccb2ee3d0 100644 (file)
@@ -162,7 +162,7 @@ class EventClient(threading.Thread):
           disconnecting. Default 1000.
 
         * reason: str --- The WebSocket close reason sent to the server when
-          disconnecting. Default is the empty string.
+          disconnecting. Default is an empty string.
 
         * timeout: float --- How long to wait for the WebSocket server to
           acknowledge the disconnection, in seconds. Default 0, which means
index 6b6e96a1f71cb4f86444c74d720a8eb19711004a..f99a0a55a92671c0d455b6704f733551658a7fb4 100644 (file)
@@ -31,6 +31,22 @@ class Arvados::V1::ContainerRequestsController < ApplicationController
       })
   end
 
+  def self._container_status_requires_parameters
+    (super rescue {}).
+      merge({
+        uuid: {
+          type: 'string', required: true, description: "The UUID of the ContainerRequest in question.",
+        },
+      })
+  end
+
+  # This API is handled entirely by controller, so this method is
+  # never called -- it's only here for the sake of adding the API to
+  # the generated discovery document.
+  def container_status
+    send_json({"errors" => "controller-only API, not handled by rails"}, status: 400)
+  end
+
   def update
     if (resource_attrs.keys.map(&:to_sym) - [:owner_uuid, :name, :description, :properties]).empty? or @object.container_uuid.nil?
       # If no attributes are being updated besides these, there are no
index 87e2737575675e2d37fc9c2b778771be89001193..b87e86f664de7e3230331e8233744ac589e4a169 100644 (file)
@@ -44,7 +44,9 @@ Rails.application.routes.draw do
         get 'secret_mounts', on: :member
         get 'current', on: :collection
       end
-      resources :container_requests
+      resources :container_requests do
+        get 'container_status', on: :member
+      end
       resources :jobs do
         get 'queue', on: :collection
         get 'queue_size', on: :collection
index 0a434ec9e86034c10e6baa64938551cb089bce4f..1a93d83f3138057954ed79c650e8e24ac1a69d2c 100644 (file)
@@ -72,7 +72,7 @@ describe('Banner / tooltip tests', function () {
 
         cy.get('[data-cy=confirmation-dialog-ok-btn]').click();
 
-        cy.get('[title=Notifications]').click();
+        cy.get('[title=Notifications]').click({ force: true });
         cy.get('li').contains('Restore Banner').click();
 
         cy.get('[data-cy=confirmation-dialog-ok-btn]').should('be.visible');
index 4ecbc7e10b23a832f327d7800d20a2552e1a1824..91b06c2b2f0a9e22d0d407422d4c2b0fc4636729 100644 (file)
@@ -903,7 +903,6 @@ const _resourceWithName = withStyles(
             <Typography
                 style={{ color: theme.palette.primary.main }}
                 inline
-                noWrap
             >
                 {uuid}
             </Typography>
@@ -914,7 +913,6 @@ const _resourceWithName = withStyles(
         <Typography
             style={{ color: theme.palette.primary.main }}
             inline
-            noWrap
         >
             {userFullname} ({uuid})
         </Typography>
index 216066530ee15ac0fc2a899ec4cfcea7c2769f4c..006759df981c80412c9f67d9244995151776d628 100755 (executable)
@@ -14,4 +14,4 @@ run_bundler --binstubs=binstubs
 cd /usr/src/arvados/sdk/cli
 run_bundler --binstubs=binstubs
 
-pip_install_sdist sdk/python services/fuse sdk/cwl
+pip_install_sdist sdk/python services/fuse tools/crunchstat-summary sdk/cwl