projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
17417: Merge branch 'main' into 17417-add-arm64
[arvados.git]
/
lib
/
dispatchcloud
/
worker
/
worker.go
diff --git
a/lib/dispatchcloud/worker/worker.go
b/lib/dispatchcloud/worker/worker.go
index 517a5d193e328b8f9f2ae2f1ce9d9b4db718ddf6..9e89d7daafc01d05b770fb065f88049dea231a7e 100644
(file)
--- a/
lib/dispatchcloud/worker/worker.go
+++ b/
lib/dispatchcloud/worker/worker.go
@@
-177,6
+177,9
@@
func (wkr *worker) startContainer(ctr arvados.Container) {
}
go func() {
rr.Start()
}
go func() {
rr.Start()
+ if wkr.wp.mTimeFromQueueToCrunchRun != nil {
+ wkr.wp.mTimeFromQueueToCrunchRun.Observe(time.Since(ctr.CreatedAt).Seconds())
+ }
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
now := time.Now()
wkr.mtx.Lock()
defer wkr.mtx.Unlock()
now := time.Now()
@@
-189,7
+192,7
@@
func (wkr *worker) startContainer(ctr arvados.Container) {
}
// ProbeAndUpdate conducts appropriate boot/running probes (if any)
}
// ProbeAndUpdate conducts appropriate boot/running probes (if any)
-// for the worker's curent state. If a previous probe is still
+// for the worker's cur
r
ent state. If a previous probe is still
// running, it does nothing.
//
// It should be called in a new goroutine.
// running, it does nothing.
//
// It should be called in a new goroutine.
@@
-373,6
+376,7
@@
func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
if u := wkr.instance.RemoteUser(); u != "root" {
cmd = "sudo " + cmd
}
if u := wkr.instance.RemoteUser(); u != "root" {
cmd = "sudo " + cmd
}
+ before := time.Now()
stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
wkr.logger.WithFields(logrus.Fields{
stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil)
if err != nil {
wkr.logger.WithFields(logrus.Fields{
@@
-380,8
+384,10
@@
func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
"stdout": string(stdout),
"stderr": string(stderr),
}).WithError(err).Warn("probe failed")
"stdout": string(stdout),
"stderr": string(stderr),
}).WithError(err).Warn("probe failed")
+ wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds())
return
}
return
}
+ wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds())
ok = true
staleRunLock := false
ok = true
staleRunLock := false