X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/7499f61a2912cfdb1a316808fafa6e6ee77ee2e0..48697cdb28fca37f1e420855a1bf1af2446184c7:/lib/dispatchcloud/worker/worker.go diff --git a/lib/dispatchcloud/worker/worker.go b/lib/dispatchcloud/worker/worker.go index 9e89d7daaf..b2ed6c2bff 100644 --- a/lib/dispatchcloud/worker/worker.go +++ b/lib/dispatchcloud/worker/worker.go @@ -6,7 +6,9 @@ package worker import ( "bytes" + "encoding/json" "fmt" + "io" "path/filepath" "strings" "sync" @@ -313,6 +315,10 @@ func (wkr *worker) probeAndUpdate() { // not yet running when ctrUUIDs was generated. Leave // wkr.running alone and wait for the next probe to // catch up on any changes. + logger.WithFields(logrus.Fields{ + "updated": updated, + "wkr.updated": wkr.updated, + }).Debug("skipping worker state update due to probe/sync race") return } @@ -377,7 +383,12 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { cmd = "sudo " + cmd } before := time.Now() - stdout, stderr, err := wkr.executor.Execute(nil, cmd, nil) + var stdin io.Reader + if prices := wkr.instance.PriceHistory(wkr.instType); len(prices) > 0 { + j, _ := json.Marshal(prices) + stdin = bytes.NewReader(j) + } + stdout, stderr, err := wkr.executor.Execute(nil, cmd, stdin) if err != nil { wkr.logger.WithFields(logrus.Fields{ "Command": cmd, @@ -387,6 +398,11 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { wkr.wp.mRunProbeDuration.WithLabelValues("fail").Observe(time.Now().Sub(before).Seconds()) return } + wkr.logger.WithFields(logrus.Fields{ + "Command": cmd, + "stdout": string(stdout), + "stderr": string(stderr), + }).Debug("probe succeeded") wkr.wp.mRunProbeDuration.WithLabelValues("success").Observe(time.Now().Sub(before).Seconds()) ok = true @@ -409,6 +425,12 @@ func (wkr *worker) probeRunning() (running []string, reportsBroken, ok bool) { // empty string following final newline } else if s == "broken" { reportsBroken = true + } else if !strings.HasPrefix(s, wkr.wp.cluster.ClusterID) { + // Ignore crunch-run processes that belong to + // a different cluster (e.g., a single host + // running multiple clusters with the loopback + // driver) + continue } else if toks := strings.Split(s, " "); len(toks) == 1 { running = append(running, s) } else if toks[1] == "stale" {