17170: Add "arvados-client shell" subcommand and backend support.
[arvados.git] / lib / dispatchcloud / worker / pool.go
index c6eaeae2b618b11423512cc2be1cb5cfc454c20c..e092e7adab5bfe0e3d9b5ff354ea4ff7ff371010 100644 (file)
@@ -5,8 +5,10 @@
 package worker
 
 import (
+       "crypto/hmac"
        "crypto/md5"
        "crypto/rand"
+       "crypto/sha256"
        "errors"
        "fmt"
        "io"
@@ -86,9 +88,8 @@ const (
 func duration(conf arvados.Duration, def time.Duration) time.Duration {
        if conf > 0 {
                return time.Duration(conf)
-       } else {
-               return def
        }
+       return def
 }
 
 // NewPool creates a Pool of workers backed by instanceSet.
@@ -117,6 +118,7 @@ func NewPool(logger logrus.FieldLogger, arvClient *arvados.Client, reg *promethe
                timeoutTERM:                    duration(cluster.Containers.CloudVMs.TimeoutTERM, defaultTimeoutTERM),
                timeoutSignal:                  duration(cluster.Containers.CloudVMs.TimeoutSignal, defaultTimeoutSignal),
                timeoutStaleRunLock:            duration(cluster.Containers.CloudVMs.TimeoutStaleRunLock, defaultTimeoutStaleRunLock),
+               systemRootToken:                cluster.SystemRootToken,
                installPublicKey:               installPublicKey,
                tagKeyPrefix:                   cluster.Containers.CloudVMs.TagKeyPrefix,
                stop:                           make(chan bool),
@@ -155,6 +157,7 @@ type Pool struct {
        timeoutTERM                    time.Duration
        timeoutSignal                  time.Duration
        timeoutStaleRunLock            time.Duration
+       systemRootToken                string
        installPublicKey               ssh.PublicKey
        tagKeyPrefix                   string
 
@@ -184,6 +187,7 @@ type Pool struct {
        mTimeToReadyForContainer  prometheus.Summary
        mTimeFromShutdownToGone   prometheus.Summary
        mTimeFromQueueToCrunchRun prometheus.Summary
+       mRunProbeDuration         *prometheus.SummaryVec
 }
 
 type createCall struct {
@@ -682,6 +686,14 @@ func (wp *Pool) registerMetrics(reg *prometheus.Registry) {
                Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
        })
        reg.MustRegister(wp.mTimeFromQueueToCrunchRun)
+       wp.mRunProbeDuration = prometheus.NewSummaryVec(prometheus.SummaryOpts{
+               Namespace:  "arvados",
+               Subsystem:  "dispatchcloud",
+               Name:       "instances_run_probe_duration_seconds",
+               Help:       "Number of seconds per runProbe call.",
+               Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.95: 0.005, 0.99: 0.001},
+       }, []string{"outcome"})
+       reg.MustRegister(wp.mRunProbeDuration)
 }
 
 func (wp *Pool) runMetrics() {
@@ -982,6 +994,12 @@ func (wp *Pool) waitUntilLoaded() {
        }
 }
 
+func (wp *Pool) gatewayAuthSecret(uuid string) string {
+       h := hmac.New(sha256.New, []byte(wp.systemRootToken))
+       fmt.Fprint(h, "%s", uuid)
+       return fmt.Sprintf("%x", h.Sum(nil))
+}
+
 // Return a random string of n hexadecimal digits (n*4 random bits). n
 // must be even.
 func randomHex(n int) string {