16636: implement review comments.
authorWard Vandewege <ward@curii.com>
Mon, 3 Aug 2020 18:37:22 +0000 (14:37 -0400)
committerWard Vandewege <ward@curii.com>
Mon, 3 Aug 2020 18:37:22 +0000 (14:37 -0400)
Arvados-DCO-1.1-Signed-off-by: Ward Vandewege <ward@curii.com>

lib/dispatchcloud/worker/pool.go
lib/dispatchcloud/worker/worker.go

index bcf35e285486d817000ee07ef825b185a8489b84..efcc102e825bc9157ffe37ccb7e220c88c246e5f 100644 (file)
@@ -437,6 +437,7 @@ func (wp *Pool) Shutdown(it arvados.InstanceType) bool {
                for _, wkr := range wp.workers {
                        if wkr.idleBehavior != IdleBehaviorHold && wkr.state == tryState && wkr.instType == it {
                                logger.WithField("Instance", wkr.instance.ID()).Info("shutting down")
+                               wkr.reportBootOutcome(BootOutcomeAborted)
                                wkr.shutdown()
                                return true
                        }
@@ -609,7 +610,7 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
                Subsystem: "dispatchcloud",
                Name:      "instances_disappeared",
                Help:      "Number of occurrences of an instance disappearing from the cloud provider's list of instances.",
-       }, []string{"state"})
+       }, []string{"outcome"})
        for _, v := range stateString {
                wp.mDisappearances.WithLabelValues(v).Add(0)
        }
@@ -776,6 +777,7 @@ func (wp *Pool) KillInstance(id cloud.InstanceID, reason string) error {
                return errors.New("instance not found")
        }
        wkr.logger.WithField("Reason", reason).Info("shutting down")
+       wkr.reportBootOutcome(BootOutcomeAborted)
        wkr.shutdown()
        return nil
 }
index 6878bb0655ea1e3bc1401396f0c9cbfe4ad9bba0..5d2360f3ccc64671b7193b281a7807d7b70de23b 100644 (file)
@@ -43,33 +43,6 @@ var stateString = map[State]string{
        StateShutdown: "shutdown",
 }
 
-// BootOutcome is the result of a worker boot. It is used as a label in a metric.
-type BootOutcome string
-
-const (
-       BootOutcomeFailed       BootOutcome = "failure"
-       BootOutcomeSucceeded    BootOutcome = "success"
-       BootOutcomeIdleShutdown BootOutcome = "idle shutdown"
-       BootOutcomeDisappeared  BootOutcome = "disappeared"
-)
-
-var validBootOutcomes = map[BootOutcome]bool{
-       BootOutcomeFailed:       true,
-       BootOutcomeSucceeded:    true,
-       BootOutcomeIdleShutdown: true,
-       BootOutcomeDisappeared:  true,
-}
-
-func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
-       if wkr.bootOutcomeReported {
-               return
-       }
-       if wkr.wp.mBootOutcomes != nil {
-               wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
-       }
-       wkr.bootOutcomeReported = true
-}
-
 // String implements fmt.Stringer.
 func (s State) String() string {
        return stateString[s]
@@ -81,6 +54,23 @@ func (s State) MarshalText() ([]byte, error) {
        return []byte(stateString[s]), nil
 }
 
+// BootOutcome is the result of a worker boot. It is used as a label in a metric.
+type BootOutcome string
+
+const (
+       BootOutcomeFailed      BootOutcome = "failure"
+       BootOutcomeSucceeded   BootOutcome = "success"
+       BootOutcomeAborted     BootOutcome = "aborted"
+       BootOutcomeDisappeared BootOutcome = "disappeared"
+)
+
+var validBootOutcomes = map[BootOutcome]bool{
+       BootOutcomeFailed:      true,
+       BootOutcomeSucceeded:   true,
+       BootOutcomeAborted:     true,
+       BootOutcomeDisappeared: true,
+}
+
 // IdleBehavior indicates the behavior desired when a node becomes idle.
 type IdleBehavior string
 
@@ -139,6 +129,17 @@ func (wkr *worker) onKilled(uuid string) {
        go wkr.wp.notify()
 }
 
+// caller must have lock.
+func (wkr *worker) reportBootOutcome(outcome BootOutcome) {
+       if wkr.bootOutcomeReported {
+               return
+       }
+       if wkr.wp.mBootOutcomes != nil {
+               wkr.wp.mBootOutcomes.WithLabelValues(string(outcome)).Inc()
+       }
+       wkr.bootOutcomeReported = true
+}
+
 // caller must have lock.
 func (wkr *worker) setIdleBehavior(idleBehavior IdleBehavior) {
        wkr.logger.WithField("IdleBehavior", idleBehavior).Info("set idle behavior")
@@ -499,7 +500,7 @@ func (wkr *worker) shutdownIfIdle() bool {
                "IdleDuration": stats.Duration(time.Since(wkr.busy)),
                "IdleBehavior": wkr.idleBehavior,
        }).Info("shutdown worker")
-       wkr.reportBootOutcome(BootOutcomeIdleShutdown)
+       wkr.reportBootOutcome(BootOutcomeAborted)
        wkr.shutdown()
        return true
 }