From 19ad5c59b064088c58136f5387fdf029b754ee36 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 12 Sep 2016 15:17:23 -0400 Subject: [PATCH] 10004: Add check_sinfo() to end srun_sync() if any of the allocated nodes are no longer "alloc" according to sinfo. --- sdk/cli/bin/crunch-job | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) diff --git a/sdk/cli/bin/crunch-job b/sdk/cli/bin/crunch-job index 39238b0fc6..48f9669fee 100755 --- a/sdk/cli/bin/crunch-job +++ b/sdk/cli/bin/crunch-job @@ -355,6 +355,7 @@ my @jobstep_done = (); my @jobstep_tomerge = (); my $jobstep_tomerge_level = 0; my $squeue_checked = 0; +my $sinfo_checked = 0; my $latest_refresh = scalar time; @@ -1401,6 +1402,29 @@ sub check_squeue } } +sub check_sinfo +{ + my $last_sinfo_check = $sinfo_checked; + + # Do not call `sinfo` more than once every 15 seconds. + return if $last_sinfo_check > time - 15; + $sinfo_checked = time; + + my @sinfo = `sinfo --nodes=\Q$ENV{SLURM_NODELIST}\E --noheader -o "%t"`; + if ($? != 0) + { + Log(undef, "warning: sinfo exit status $? ($!)"); + return; + } + chop @sinfo; + + foreach (@sinfo) + { + if ($_ != "alloc" && $_ != "alloc*") { + $main::please_freeze = 1; + } + } +} sub release_allocation { @@ -1906,7 +1930,6 @@ sub freezeunquote return $s; } - sub srun_sync { my $srunargs = shift; @@ -1961,6 +1984,7 @@ sub srun_sync if (!$busy || ($latest_refresh + 2 < scalar time)) { check_refresh_wanted(); check_squeue(); + check_sinfo(); } if (!$busy) { select(undef, undef, undef, 0.1); -- 2.30.2