18298: Request pend_reason field when polling bjobs.
authorTom Clegg <tom@curii.com>
Wed, 17 Nov 2021 21:42:13 +0000 (16:42 -0500)
committerTom Clegg <tom@curii.com>
Wed, 17 Nov 2021 21:42:13 +0000 (16:42 -0500)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/lsf/dispatch_test.go
lib/lsf/lsfcli.go
lib/lsf/lsfqueue.go

index 641453e5480ced43609efcb499d5dbff61383cae..bb3b1b9df6439de040b651728ce3c730289758c6 100644 (file)
@@ -6,6 +6,7 @@ package lsf
 
 import (
        "context"
+       "encoding/json"
        "fmt"
        "math/rand"
        "os/exec"
@@ -103,13 +104,26 @@ func (stub lsfstub) stubCommand(s *suite, c *check.C) func(prog string, args ...
                        }
                        return exec.Command("echo", "submitted job")
                case "bjobs":
-                       c.Check(args, check.DeepEquals, []string{"-u", "all", "-noheader", "-o", "jobid stat job_name:30"})
-                       out := ""
+                       c.Check(args, check.DeepEquals, []string{"-u", "all", "-o", "jobid stat job_name pend_reason", "-json"})
+                       var records []map[string]interface{}
                        for jobid, uuid := range fakejobq {
-                               out += fmt.Sprintf(`%d %s %s\n`, jobid, "RUN", uuid)
+                               records = append(records, map[string]interface{}{
+                                       "JOBID":       fmt.Sprintf("%d", jobid),
+                                       "STAT":        "RUN",
+                                       "JOB_NAME":    uuid,
+                                       "PEND_REASON": "",
+                               })
                        }
-                       c.Logf("bjobs out: %q", out)
-                       return exec.Command("printf", out)
+                       out, err := json.Marshal(map[string]interface{}{
+                               "COMMAND": "bjobs",
+                               "JOBS":    len(fakejobq),
+                               "RECORDS": records,
+                       })
+                       if err != nil {
+                               panic(err)
+                       }
+                       c.Logf("bjobs out: %s", out)
+                       return exec.Command("printf", string(out))
                case "bkill":
                        killid, _ := strconv.Atoi(args[0])
                        if uuid, ok := fakejobq[killid]; !ok {
index 9d712ee97fa02013eff3035edc0cdb7cd43fab64..d17559568c13bbadbee2da66c6c443b99ef552f1 100644 (file)
@@ -6,6 +6,7 @@ package lsf
 
 import (
        "bytes"
+       "encoding/json"
        "fmt"
        "os"
        "os/exec"
@@ -16,9 +17,10 @@ import (
 )
 
 type bjobsEntry struct {
-       id   int
-       name string
-       stat string
+       ID         string `json:"JOBID"`
+       Name       string `json:"JOB_NAME"`
+       Stat       string `json:"STAT"`
+       PendReason string `json:"PEND_REASON"`
 }
 
 type lsfcli struct {
@@ -53,29 +55,21 @@ func (cli lsfcli) Bsub(script []byte, args []string, arv *arvados.Client) error
 
 func (cli lsfcli) Bjobs() ([]bjobsEntry, error) {
        cli.logger.Debugf("Bjobs()")
-       cmd := cli.command("bjobs", "-u", "all", "-noheader", "-o", "jobid stat job_name:30")
+       cmd := cli.command("bjobs", "-u", "all", "-o", "jobid stat job_name pend_reason", "-json")
        buf, err := cmd.Output()
        if err != nil {
                return nil, errWithStderr(err)
        }
-       var bjobs []bjobsEntry
-       for _, line := range strings.Split(string(buf), "\n") {
-               if line == "" {
-                       continue
-               }
-               var ent bjobsEntry
-               if _, err := fmt.Sscan(line, &ent.id, &ent.stat, &ent.name); err != nil {
-                       cli.logger.Warnf("ignoring unparsed line in bjobs output: %q", line)
-                       continue
-               }
-               bjobs = append(bjobs, ent)
+       var resp struct {
+               Records []bjobsEntry `json:"RECORDS"`
        }
-       return bjobs, nil
+       err = json.Unmarshal(buf, &resp)
+       return resp.Records, err
 }
 
-func (cli lsfcli) Bkill(id int) error {
-       cli.logger.Infof("Bkill(%d)", id)
-       cmd := cli.command("bkill", fmt.Sprintf("%d", id))
+func (cli lsfcli) Bkill(id string) error {
+       cli.logger.Infof("Bkill(%s)", id)
+       cmd := cli.command("bkill", id)
        buf, err := cmd.CombinedOutput()
        if err == nil || strings.Index(string(buf), "already finished") >= 0 {
                return nil
index 3c4fc4cb8cf6bc72e4cb1768041cd329e68eded1..971bdd42141cec13cae468fbd5b648edd046c2f6 100644 (file)
@@ -26,9 +26,9 @@ type lsfqueue struct {
 // JobID waits for the next queue update (so even a job that was only
 // submitted a nanosecond ago will show up) and then returns the LSF
 // job ID corresponding to the given container UUID.
-func (q *lsfqueue) JobID(uuid string) (int, bool) {
+func (q *lsfqueue) JobID(uuid string) (string, bool) {
        ent, ok := q.getNext()[uuid]
-       return ent.id, ok
+       return ent.ID, ok
 }
 
 // All waits for the next queue update, then returns the names of all
@@ -94,7 +94,7 @@ func (q *lsfqueue) init() {
                        }
                        next := make(map[string]bjobsEntry, len(ents))
                        for _, ent := range ents {
-                               next[ent.name] = ent
+                               next[ent.Name] = ent
                        }
                        // Replace q.latest and notify all the
                        // goroutines that the "next update" they