if err != nil {
return nil, fmt.Errorf("creating temp dir: %v", err)
}
- err = gitMount(mnt).extractTree(runner.ContainerArvClient, tmpdir, token)
+ err = gitMount(mnt).extractTree(runner.containerClient, tmpdir, token)
if err != nil {
return nil, err
}
txt, err := (&copier{
client: runner.containerClient,
- arvClient: runner.ContainerArvClient,
keepClient: runner.ContainerKeepClient,
hostOutputDir: runner.HostOutputDir,
ctrOutputDir: runner.Container.OutputPath,
if final {
updates["is_trashed"] = true
} else {
- exp := time.Now().Add(crunchLogUpdatePeriod * 24)
+ // We set trash_at so this collection gets
+ // automatically cleaned up eventually. It used to be
+ // 12 hours but we had a situation where the API
+ // server was down over a weekend but the containers
+ // kept running such that the log collection got
+ // trashed, so now we make it 2 weeks. refs #20378
+ exp := time.Now().Add(time.Duration(24*14) * time.Hour)
updates["trash_at"] = exp
updates["delete_at"] = exp
}
signal.Notify(sigusr2, syscall.SIGUSR2)
defer signal.Stop(sigusr2)
runner.loadPrices()
- go func() {
- for range sigusr2 {
- runner.loadPrices()
- }
- }()
+ go runner.handleSIGUSR2(sigusr2)
runner.finalState = "Queued"
log.Printf("%s: %v", containerUUID, err)
return 1
}
- api.Retries = 8
+ // arvadosclient now interprets Retries=10 to mean
+ // Timeout=10m, retrying with exponential backoff + jitter.
+ api.Retries = 10
kc, err := keepclient.MakeKeepClient(api)
if err != nil {
ContainerUUID: containerUUID,
Target: cr.executor,
Log: cr.CrunchLog,
+ LogCollection: cr.LogCollection,
}
if gwListen == "" {
// Direct connection won't work, so we use the
fmt.Fprintf(stderr, "error setting up arvadosclient: %s\n", err)
return conf
}
- arv.Retries = 8
+ // arvadosclient now interprets Retries=10 to mean
+ // Timeout=10m, retrying with exponential backoff + jitter.
+ arv.Retries = 10
var ctr arvados.Container
err = arv.Call("GET", "containers", uuid, "", arvadosclient.Dict{"select": []string{"runtime_constraints"}}, &ctr)
if err != nil {
return cost
}
+
+func (runner *ContainerRunner) handleSIGUSR2(sigchan chan os.Signal) {
+ for range sigchan {
+ runner.loadPrices()
+ update := arvadosclient.Dict{
+ "container": arvadosclient.Dict{
+ "cost": runner.calculateCost(time.Now()),
+ },
+ }
+ runner.DispatcherArvClient.Update("containers", runner.Container.UUID, update, nil)
+ }
+}