20540: Increase crunch-run API call timeouts to 10m. 20540-crunch-run-retry
authorTom Clegg <tom@curii.com>
Thu, 1 Jun 2023 17:54:41 +0000 (13:54 -0400)
committerTom Clegg <tom@curii.com>
Thu, 1 Jun 2023 17:54:41 +0000 (13:54 -0400)
Arvados-DCO-1.1-Signed-off-by: Tom Clegg <tom@curii.com>

lib/crunchrun/crunchrun.go

index cab09d11c00652069e88aac6b910cc21b3845590..7d69fb0f977d408e890cd9a21d44f91be17dd132 100644 (file)
@@ -1994,7 +1994,9 @@ func (command) RunCommand(prog string, args []string, stdin io.Reader, stdout, s
                log.Printf("%s: %v", containerUUID, err)
                return 1
        }
-       api.Retries = 8
+       // arvadosclient now interprets Retries=10 to mean
+       // Timeout=10m, retrying with exponential backoff + jitter.
+       api.Retries = 10
 
        kc, err := keepclient.MakeKeepClient(api)
        if err != nil {
@@ -2171,7 +2173,9 @@ func hpcConfData(uuid string, configFile string, stderr io.Writer) ConfigData {
                fmt.Fprintf(stderr, "error setting up arvadosclient: %s\n", err)
                return conf
        }
-       arv.Retries = 8
+       // arvadosclient now interprets Retries=10 to mean
+       // Timeout=10m, retrying with exponential backoff + jitter.
+       arv.Retries = 10
        var ctr arvados.Container
        err = arv.Call("GET", "containers", uuid, "", arvadosclient.Dict{"select": []string{"runtime_constraints"}}, &ctr)
        if err != nil {