projects
/
arvados.git
/ blobdiff
summary
|
shortlog
|
log
|
commit
|
commitdiff
|
tree
raw
|
inline
| side by side
Allow multiple clusters to use loopback driver on same host.
[arvados.git]
/
lib
/
dispatchcloud
/
worker
/
worker.go
diff --git
a/lib/dispatchcloud/worker/worker.go
b/lib/dispatchcloud/worker/worker.go
index 9e89d7daafc01d05b770fb065f88049dea231a7e..b01a820cd619b172538b725d689d0323897611d5 100644
(file)
--- a/
lib/dispatchcloud/worker/worker.go
+++ b/
lib/dispatchcloud/worker/worker.go
@@
-313,6
+313,10
@@
func (wkr *worker) probeAndUpdate() {
// not yet running when ctrUUIDs was generated. Leave
// wkr.running alone and wait for the next probe to
// catch up on any changes.
// not yet running when ctrUUIDs was generated. Leave
// wkr.running alone and wait for the next probe to
// catch up on any changes.
+ logger.WithFields(logrus.Fields{
+ "updated": updated,
+ "wkr.updated": wkr.updated,
+ }).Debug("skipping worker state update due to probe/sync race")
return
}
return
}
@@
-387,6
+391,11
@@
func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds())
return
}
wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds())
return
}
+ wkr.logger.WithFields(logrus.Fields{
+ "Command": cmd,
+ "stdout": string(stdout),
+ "stderr": string(stderr),
+ }).Debug("probe succeeded")
wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds())
ok = true
wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds())
ok = true
@@
-409,6
+418,12
@@
func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) {
// empty string following final newline
} else if s == "broken" {
reportsBroken = true
// empty string following final newline
} else if s == "broken" {
reportsBroken = true
+ } else if !strings.HasPrefix(s, wkr.wp.cluster.ClusterID) {
+ // Ignore crunch-run processes that belong to
+ // a different cluster (e.g., a single host
+ // running multiple clusters with the loopback
+ // driver)
+ continue
} else if toks := strings.Split(s, " "); len(toks) == 1 {
running = append(running, s)
} else if toks[1] == "stale" {
} else if toks := strings.Split(s, " "); len(toks) == 1 {
running = append(running, s)
} else if toks[1] == "stale" {