(to something other than nil).
Also:
* Add test fixtures for jobs
* Take care to update the Job object piecemeal during crunch-job
(avoid overwriting cancelled_at)
=item SIGHUP
Refresh node allocation (i.e., check whether any nodes have been added
-or unallocated). Currently this is a no-op.
+or unallocated) and attributes of the Job record that should affect
+behavior (e.g., cancel job if cancelled_at becomes non-nil).
=back
my $local_job = !$job_has_uuid;
-$SIG{'HUP'} = sub
-{
- 1;
-};
$SIG{'USR1'} = sub
{
$main::ENV{CRUNCH_DEBUG} = 1;
if ($job_has_uuid)
{
# Claim this job, and make sure nobody else does
-
- $Job->{'is_locked_by_uuid'} = $User->{'uuid'};
- $Job->{'started_at'} = gmtime;
- $Job->{'running'} = 1;
- $Job->{'success'} = undef;
- $Job->{'tasks_summary'} = { 'failed' => 0,
- 'todo' => 1,
- 'running' => 0,
- 'done' => 0 };
- if ($job_has_uuid) {
- unless ($Job->save() && $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
- croak("Error while updating / locking job");
- }
+ unless ($Job->update_attributes('is_locked_by_uuid' => $User->{'uuid'}) &&
+ $Job->{'is_locked_by_uuid'} == $User->{'uuid'}) {
+ croak("Error while updating / locking job");
}
+ $Job->update_attributes('started_at' => gmtime,
+ 'running' => 1,
+ 'success' => undef,
+ 'tasks_summary' => { 'failed' => 0,
+ 'todo' => 1,
+ 'running' => 0,
+ 'done' => 0 });
}
$SIG{'TSTP'} = sub { $main::please_freeze = 1; };
$SIG{'ALRM'} = sub { $main::please_info = 1; };
$SIG{'CONT'} = sub { $main::please_continue = 1; };
+$SIG{'HUP'} = sub { $main::please_refresh = 1; };
+
$main::please_freeze = 0;
$main::please_info = 0;
$main::please_continue = 0;
+$main::please_refresh = 0;
my $jobsteps_must_output_keys = 0; # becomes 1 when any task outputs a key
grep { $ENV{$1} = $2 if /^(NOCACHE.*?)=(.*)/ } split ("\n", $$Job{knobs});
Log (undef, "Using commit $commit for tree-ish $treeish");
if ($commit ne $treeish) {
$Job->{'script_version'} = $commit;
- !$job_has_uuid or $Job->save() or croak("Error while updating job");
+ !$job_has_uuid or
+ $Job->update_attributes('script_version' => $commit) or
+ croak("Error while updating job");
}
}
}
(@slot > @freeslot && $todo_ptr+1 > $#jobstep_todo))
{
last THISROUND if $main::please_freeze;
+ if ($main::please_refresh)
+ {
+ $main::please_refresh = 0;
+ if ($job_has_uuid) {
+ $Job2 = $arv->{'jobs'}->{'get'}->execute('uuid' => $jobspec);
+ for my $attr ('cancelled_at',
+ 'cancelled_by_user_uuid',
+ 'cancelled_by_client_uuid') {
+ $Job->{$attr} = $Job2->{$attr};
+ }
+ if ($Job->{'cancelled_at'}) {
+ Log (undef, "Job cancelled at " . $Job->{cancelled_at} .
+ " by user " . $Job->{cancelled_by_user_uuid});
+ $main::success = 0;
+ $main::please_freeze = 1;
+ }
+ }
+ }
if ($main::please_info)
{
$main::please_info = 0;
release_allocation();
freeze();
-$Job->reload;
-$Job->{'output'} = &collate_output();
-$Job->{'running'} = 0;
-$Job->{'success'} = $Job->{'output'} && $main::success;
-$Job->{'finished_at'} = gmtime;
-$Job->save if $job_has_uuid;
+if ($job_has_uuid) {
+ $Job->update_attributes('output' => &collate_output(),
+ 'running' => 0,
+ 'success' => $Job->{'output'} && $main::success,
+ 'finished_at' => gmtime)
+}
if ($Job->{'output'})
{
$Job->{'tasks_summary'}->{'todo'} = $todo;
$Job->{'tasks_summary'}->{'done'} = $done;
$Job->{'tasks_summary'}->{'running'} = $running;
- $Job->save if $job_has_uuid;
+ if ($job_has_uuid) {
+ $Job->update_attributes('tasks_summary' => $Job->{'tasks_summary'});
+ }
Log (undef, "status: $done done, $running running, $todo todo");
$progress_is_dirty = 0;
}
if ($joboutput)
{
Log (undef, "output $joboutput");
- $Job->{'output'} = $joboutput;
- $Job->save if $job_has_uuid;
+ $Job->update_attributes('output' => $joboutput) if $job_has_uuid;
}
else
{
sub cleanup
{
return if !$job_has_uuid;
- $Job->reload;
- $Job->{'running'} = 0;
- $Job->{'success'} = 0;
- $Job->{'finished_at'} = gmtime;
- $Job->save;
+ $Job->update_attributes('running' => 0,
+ 'success' => 0,
+ 'finished_at' => gmtime);
}
undef $metastream if !$justcheckpoint; # otherwise Log() will try to use it
Log (undef, "meta key is $loglocator");
$Job->{'log'} = $loglocator;
- $Job->save if $job_has_uuid;
+ $Job->update_attributes('log', $loglocator) if $job_has_uuid;
}
$self;
}
+sub update_attributes
+{
+ my $self = shift;
+ my %updates = @_;
+ $response = $self->{'resourceAccessor'}->{'update'}->execute('uuid' => $self->{'uuid'}, $self->resource_parameter_name() => \%updates);
+ foreach my $param (keys %updates) {
+ if (exists $response->{$param}) {
+ $self->{$param} = $response->{$param};
+ }
+ }
+ $self;
+}
+
sub reload
{
my $self = shift;
gem 'andand'
gem 'redis'
+
+gem 'test_after_commit', :group => :test
multi_json (~> 1.0)
rack (~> 1.0)
tilt (~> 1.1, != 1.3.0)
+ test_after_commit (0.2.2)
therubyracer (0.12.0)
libv8 (~> 3.16.14.0)
ref
redis
rvm-capistrano
sass-rails (>= 3.2.0)
+ test_after_commit
therubyracer
uglifier (>= 1.0.3)
before_create :ensure_unique_submit_id
before_create :ensure_script_version_is_commit
before_update :ensure_script_version_is_commit
+ after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
if script_changed? or
script_parameters_changed? or
script_version_changed? or
- cancelled_by_client_changed? or
- cancelled_by_user_changed? or
- cancelled_at_changed? or
+ (!cancelled_at_was.nil? and
+ (cancelled_by_client_changed? or
+ cancelled_by_user_changed? or
+ cancelled_at_changed?)) or
started_at_changed? or
finished_at_changed? or
running_changed? or
end
end
end
+
+ def update_modified_by_fields
+ if self.cancelled_at_changed?
+ # Ensure cancelled_at cannot be set to arbitrary non-now times,
+ # or changed once it is set.
+ if self.cancelled_at and not self.cancelled_at_was
+ self.cancelled_at = Time.now
+ self.cancelled_by_user_uuid = current_user.uuid
+ self.cancelled_by_client_uuid = current_api_client.uuid
+ @need_crunch_dispatch_trigger = true
+ else
+ self.cancelled_at = self.cancelled_at_was
+ self.cancelled_by_user_uuid = self.cancelled_by_user_uuid_was
+ self.cancelled_by_client_uuid = self.cancelled_by_client_uuid_was
+ end
+ end
+ super
+ end
+
+ def trigger_crunch_dispatch_if_cancelled
+ if @need_crunch_dispatch_trigger
+ File.open(Rails.configuration.crunch_dispatch_hup_trigger, 'wb') do
+ # That's all, just create a file for crunch-dispatch to see.
+ end
+ end
+ end
+
end
config.crunch_job_wrapper = :none
config.crunch_job_user = 'crunch' # if false, do not set uid when running jobs
+ # The web service must be able to create this file, and
+ # crunch_dispatch.rb must be able to unlink it.
+ config.crunch_dispatch_hup_trigger = '/tmp/crunch_dispatch_hup_trigger'
+
# config.dnsmasq_conf_dir = '/etc/dnsmasq.d'
# config.compute_node_ami = 'ami-cbca41a2'
config.crunch_job_wrapper = :slurm_immediate
config.crunch_job_user = 'crunch' # if false, do not set uid when running jobs
+ # The web service must be able to create this file, and
+ # crunch_dispatch.rb must be able to unlink it.
+ config.crunch_dispatch_hup_trigger = '/tmp/crunch_dispatch_hup_trigger'
+
# config.dnsmasq_conf_dir = '/etc/dnsmasq.d'
# config.compute_node_ami = 'ami-cbca41a2'
config.crunch_job_wrapper = :slurm_immediate
config.crunch_job_user = 'crunch' # if false, do not set uid when running jobs
+ # The web service must be able to create this file, and
+ # crunch_dispatch.rb must be able to unlink it.
+ config.crunch_dispatch_hup_trigger = '/tmp/crunch_dispatch_hup_trigger_test'
+
# config.dnsmasq_conf_dir = '/etc/dnsmasq.d'
# config.compute_node_ami = 'ami-cbca41a2'
include Process
+$warned = {}
$signal = {}
%w{TERM INT}.each do |sig|
signame = sig
$signal[:term] = true
end
end
+Signal.trap('HUP') do
+ $stderr.puts "Received HUP signal"
+ $signal[:hup] = true
+end
if ENV["CRUNCH_DISPATCH_LOCKFILE"]
lockfilename = ENV.delete "CRUNCH_DISPATCH_LOCKFILE"
end
end
else
+ if File.exists?(Rails.configuration.crunch_dispatch_hup_trigger)
+ begin
+ File.unlink(Rails.configuration.crunch_dispatch_hup_trigger)
+ $signal[:hup] = true
+ rescue Errno::ENOENT
+ $stderr.puts "Weird, hup_trigger file was deleted by someone else."
+ rescue Errno::EPERM
+ if not $warned[:hup_trigger_perm]
+ $warned[:hup_trigger_perm] = true
+ $stderr.puts "Install problem: I see the hup_trigger file but cannot delete it."
+ end
+ end
+ end
+ if $signal[:hup]
+ # Pass HUP through to all crunch-job processes.
+ @running.each do |uuid, j|
+ begin
+ Process.kill 'HUP', j[:wait_thr].pid
+ rescue Errno::ESRCH
+ # Process ended but hasn't been reaped. Nothing to do.
+ end
+ end
+ $signal.delete :hup
+ end
refresh_todo unless did_recently(:refresh_todo, 1.0)
update_node_status
start_jobs unless @todo.empty? or did_recently(:start_jobs, 1.0)
# Read about fixtures at http://api.rubyonrails.org/classes/ActiveRecord/Fixtures.html
trusted_workbench:
+ uuid: zzzzz-ozdt8-teyxzyd8qllg11h
name: Official Workbench
url_prefix: https://official-workbench.local/
is_trusted: true
untrusted:
+ uuid: zzzzz-ozdt8-obw7foaks3qjyej
name: Untrusted
url_prefix: https://untrusted.local/
is_trusted: false
--- /dev/null
+running:
+ uuid: zzzzz-8i9sb-pshmckwoma9plh7
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ cancelled_at: ~
+ cancelled_by_user_uuid: ~
+ cancelled_by_client_uuid: ~
+ started_at: <%= 3.minute.ago.to_s(:db) %>
+ finished_at: ~
+ running: true
+ success: ~
+ output: ~
+ priority: ~
+ log: ~
+ is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ tasks_summary:
+ failed: 0
+ todo: 3
+ running: 1
+ done: 1
+ runtime_constraints: {}
+
+running_cancelled:
+ uuid: zzzzz-8i9sb-4cf0nhn6xte809j
+ owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ cancelled_at: <%= 1.minute.ago.to_s(:db) %>
+ cancelled_by_user_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ cancelled_by_client_uuid: zzzzz-ozdt8-obw7foaks3qjyej
+ started_at: <%= 3.minute.ago.to_s(:db) %>
+ finished_at: ~
+ running: true
+ success: ~
+ output: ~
+ priority: ~
+ log: ~
+ is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz
+ tasks_summary:
+ failed: 0
+ todo: 3
+ running: 1
+ done: 1
+ runtime_constraints: {}
require 'test_helper'
class Arvados::V1::JobsControllerTest < ActionController::TestCase
+
+ test "submit a job" do
+ authorize_with :active
+ post :create, job: {
+ script: "hash",
+ script_version: "master",
+ script_parameters: {}
+ }
+ assert_response :success
+ assert_not_nil assigns(:object)
+ new_job = JSON.parse(@response.body)
+ assert_not_nil new_job['uuid']
+ end
+
+ test "cancel a running job" do
+ # We need to verify that "cancel" creates a trigger file, so first
+ # let's make sure there is no stale trigger file.
+ begin
+ File.unlink(Rails.configuration.crunch_dispatch_hup_trigger)
+ rescue Errno::ENOENT
+ end
+
+ authorize_with :active
+ put :update, {
+ id: jobs(:running).uuid,
+ job: {
+ cancelled_at: 4.day.ago
+ }
+ }
+ assert_response :success
+ assert_not_nil assigns(:object)
+ job = JSON.parse(@response.body)
+ assert_not_nil job['uuid']
+ assert_not_nil job['cancelled_at']
+ assert_not_nil job['cancelled_by_user_uuid']
+ assert_not_nil job['cancelled_by_client_uuid']
+ assert_equal(true, job['cancelled_at'] > 1.minute.ago,
+ 'bogus cancelled_at corrected by server')
+ assert_equal(true,
+ File.exists?(Rails.configuration.crunch_dispatch_hup_trigger),
+ 'trigger file should be created when job is cancelled')
+
+ put :update, {
+ id: jobs(:running).uuid,
+ job: {
+ cancelled_at: nil
+ }
+ }
+ job = JSON.parse(@response.body)
+ assert_not_nil job['cancelled_at'], 'un-cancelled job stays cancelled'
+ end
+
end