From 3f8336875d5938afb6b00289e9d6c9941456b57f Mon Sep 17 00:00:00 2001 From: radhika Date: Tue, 7 Feb 2017 17:51:13 -0500 Subject: [PATCH] 10903: Add cancel method with cascade to pipeline_instance and update a job's cancel method to accept a cascade parameter. --- .../controllers/arvados/v1/jobs_controller.rb | 2 +- .../v1/pipeline_instances_controller.rb | 6 + services/api/app/models/job.rb | 26 +++ services/api/app/models/pipeline_instance.rb | 20 ++ services/api/config/routes.rb | 4 +- services/api/test/fixtures/jobs.yml | 175 ++++++++++++++++++ .../api/test/fixtures/pipeline_instances.yml | 39 ++++ .../v1/pipeline_instances_controller_test.rb | 20 ++ services/api/test/unit/job_test.rb | 47 +++++ 9 files changed, 337 insertions(+), 2 deletions(-) diff --git a/services/api/app/controllers/arvados/v1/jobs_controller.rb b/services/api/app/controllers/arvados/v1/jobs_controller.rb index 243f38b78c..c38e419084 100644 --- a/services/api/app/controllers/arvados/v1/jobs_controller.rb +++ b/services/api/app/controllers/arvados/v1/jobs_controller.rb @@ -46,7 +46,7 @@ class Arvados::V1::JobsController < ApplicationController def cancel reload_object_before_update - @object.update_attributes! state: Job::Cancelled + @object.cancel params[:cascade] show end diff --git a/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb b/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb index 614af685f9..476fc0abb5 100644 --- a/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb +++ b/services/api/app/controllers/arvados/v1/pipeline_instances_controller.rb @@ -2,4 +2,10 @@ class Arvados::V1::PipelineInstancesController < ApplicationController accept_attribute_as_json :components, Hash accept_attribute_as_json :properties, Hash accept_attribute_as_json :components_summary, Hash + + def cancel + reload_object_before_update + @object.cancel params[:cascade] + show + end end diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb index 2ae7139281..5ce5ff20f2 100644 --- a/services/api/app/models/job.rb +++ b/services/api/app/models/job.rb @@ -284,6 +284,32 @@ class Job < ArvadosModel end end + def cancel cascade=nil + if self.state.in?([Queued, Running]) + self.state = Cancelled + self.save! + elsif self.state != Cancelled + raise InvalidStateTransitionError + end + + return if !cascade + + # cancel all children; they could be jobs or pipeline instances + children = self.components.andand.collect{|_, u| u}.compact + + return if children.empty? + + # cancel any child jobs + Job.where(uuid: children).each do |job| + job.cancel cascade if job.state.in?([Queued, Running]) + end + + # cancel any child pipelines + PipelineInstance.where(uuid: children).each do |pi| + pi.cancel cascade if pi.state.in?([PipelineInstance::RunningOnServer, PipelineInstance::RunningOnClient]) + end + end + protected def self.sorted_hash_digest h diff --git a/services/api/app/models/pipeline_instance.rb b/services/api/app/models/pipeline_instance.rb index f84c4a310f..651f36d30f 100644 --- a/services/api/app/models/pipeline_instance.rb +++ b/services/api/app/models/pipeline_instance.rb @@ -100,6 +100,26 @@ class PipelineInstance < ArvadosModel self.where("state = 'RunningOnServer'") end + def cancel cascade=nil + if self.state.in?([RunningOnServer, RunningOnClient]) + self.state = Paused + self.save! + elsif self.state != Paused + raise InvalidStateTransitionError + end + + return if !cascade + + # cancel all child jobs + children = self.components.andand.collect{|_, c| c['job']}.compact.collect{|j| j['uuid']}.compact + + return if children.empty? + + Job.where(uuid: children).each do |job| + job.cancel cascade if job.state.in?([Job::Queued, Job::Running]) + end + end + protected def bootstrap_components if pipeline_template and (!components or components.empty?) diff --git a/services/api/config/routes.rb b/services/api/config/routes.rb index f89d2c16a8..9cb53fee30 100644 --- a/services/api/config/routes.rb +++ b/services/api/config/routes.rb @@ -54,7 +54,9 @@ Server::Application.routes.draw do resources :nodes do post 'ping', on: :member end - resources :pipeline_instances + resources :pipeline_instances do + post 'cancel', on: :member + end resources :pipeline_templates resources :workflows resources :repositories do diff --git a/services/api/test/fixtures/jobs.yml b/services/api/test/fixtures/jobs.yml index eb0da8b083..809ba0e138 100644 --- a/services/api/test/fixtures/jobs.yml +++ b/services/api/test/fixtures/jobs.yml @@ -557,3 +557,178 @@ running_job_with_components: component1: zzzzz-8i9sb-jyq01m7in1jlofj component2: zzzzz-d1hrv-partdonepipelin script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + +# This main level job is in running state with one job and one pipeline instance components +running_job_with_components_at_level_1: + uuid: zzzzz-8i9sb-jobcomponentsl1 + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + components: + component1: zzzzz-8i9sb-jobcomponentsl2 + component2: zzzzz-d1hrv-picomponentsl02 + +# This running job, a child of level_1, has one child component +running_job_with_components_at_level_2: + uuid: zzzzz-8i9sb-jobcomponentsl2 + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + components: + component1: zzzzz-8i9sb-job1atlevel3noc + +# The below two running jobs, children of level_2, have no child components +running_job_1_with_components_at_level_3: + uuid: zzzzz-8i9sb-job1atlevel3noc + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + +running_job_2_with_components_at_level_3: + uuid: zzzzz-8i9sb-job2atlevel3noc + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + +# The two jobs below are so confused, they have circular relationship +running_job_1_with_circular_component_relationship: + uuid: zzzzz-8i9sb-job1withcirculr + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + components: + component1: zzzzz-8i9sb-job2withcirculr + +running_job_2_with_circular_component_relationship: + uuid: zzzzz-8i9sb-job2withcirculr + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + cancelled_at: ~ + cancelled_by_user_uuid: ~ + cancelled_by_client_uuid: ~ + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + finished_at: ~ + repository: active/foo + script: hash + script_version: 1de84a854e2b440dc53bf42f8548afa4c17da332 + script_parameters_digest: 99914b932bd37a50b983c5e7c90ae93b + running: true + success: ~ + output: ~ + priority: 0 + log: ~ + is_locked_by_uuid: zzzzz-tpzed-xurymjxw79nv3jz + tasks_summary: + failed: 0 + todo: 3 + running: 1 + done: 1 + runtime_constraints: {} + state: Running + components: + component1: zzzzz-8i9sb-job1withcirculr diff --git a/services/api/test/fixtures/pipeline_instances.yml b/services/api/test/fixtures/pipeline_instances.yml index 34dbe9603b..1d5a45fac8 100644 --- a/services/api/test/fixtures/pipeline_instances.yml +++ b/services/api/test/fixtures/pipeline_instances.yml @@ -420,6 +420,45 @@ failed_pipeline_with_two_jobs: uuid: zzzzz-8i9sb-cjs4pklxxjykqqq log: zzzzz-4zz18-op4e2lbej01tcvu +# This pipeline is a child of another running job and has it's own running children +job_child_pipeline_with_components_at_level_2: + state: RunningOnServer + uuid: zzzzz-d1hrv-picomponentsl02 + owner_uuid: zzzzz-tpzed-xurymjxw79nv3jz + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + components: + foo: + script: foo + script_version: master + script_parameters: {} + job: + uuid: zzzzz-8i9sb-job1atlevel3noc + script_version: master + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + state: Running + tasks_summary: + failed: 0 + todo: 0 + running: 1 + done: 1 + bar: + script: bar + script_version: master + script_parameters: {} + job: + uuid: zzzzz-8i9sb-job2atlevel3noc + script_version: master + created_at: <%= 12.hour.ago.to_s(:db) %> + started_at: <%= 12.hour.ago.to_s(:db) %> + state: Running + tasks_summary: + failed: 0 + todo: 1 + running: 2 + done: 3 + # Test Helper trims the rest of the file # Do not add your fixtures below this line as the rest of this file will be trimmed by test_helper diff --git a/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb b/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb index 63c47ff407..933ce7d9c9 100644 --- a/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb +++ b/services/api/test/functional/arvados/v1/pipeline_instances_controller_test.rb @@ -25,4 +25,24 @@ class Arvados::V1::PipelineInstancesControllerTest < ActionController::TestCase assert_equal({}, assigns(:object).components) end + [ + true, + false + ].each do |cascade| + test "cancel a pipeline instance with cascade=#{cascade}" do + authorize_with :active + pi_uuid = pipeline_instances(:job_child_pipeline_with_components_at_level_2).uuid + + post :cancel, {id: pi_uuid, cascade: cascade} + assert_response :success + + pi = PipelineInstance.where(uuid: pi_uuid).first + assert_equal "Paused", pi.state + + children = Job.where(uuid: ['zzzzz-8i9sb-job1atlevel3noc', 'zzzzz-8i9sb-job2atlevel3noc']) + children.each do |child| + assert_equal ("Cancelled" == child.state), cascade + end + end + end end diff --git a/services/api/test/unit/job_test.rb b/services/api/test/unit/job_test.rb index 761953e8eb..72b7689817 100644 --- a/services/api/test/unit/job_test.rb +++ b/services/api/test/unit/job_test.rb @@ -502,4 +502,51 @@ class JobTest < ActiveSupport::TestCase update_all(output: 'aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa+1') assert_nil Job.find_reusable(example_attrs, {}, [], [users(:active)]) end + + [ + true, + false, + ].each do |cascade| + test "cancel job with cascade #{cascade}" do + job = Job.find_by_uuid jobs(:running_job_with_components_at_level_1).uuid + job.cancel cascade + assert_equal Job::Cancelled, job.state + + descendents = ['zzzzz-8i9sb-jobcomponentsl2', + 'zzzzz-d1hrv-picomponentsl02', + 'zzzzz-8i9sb-job1atlevel3noc', + 'zzzzz-8i9sb-job2atlevel3noc'] + + jobs = Job.where(uuid: descendents) + jobs.each do |j| + assert_equal ('Cancelled' == j.state), cascade + end + + pipelines = PipelineInstance.where(uuid: descendents) + pipelines.each do |pi| + assert_equal ('Paused' == pi.state), cascade + end + end + end + + test 'cancelling a completed job raises error' do + job = Job.find_by_uuid jobs(:job_with_latest_version).uuid + assert job + assert_equal 'Complete', job.state + + assert_raises(ArvadosModel::InvalidStateTransitionError) do + job.cancel + end + end + + test 'cancelling a job with circular relationship with another does not result in an infinite loop' do + job = Job.find_by_uuid jobs(:running_job_2_with_circular_component_relationship).uuid + + job.cancel true + + assert_equal Job::Cancelled, job.state + + child = Job.find_by_uuid job.components.collect{|_, uuid| uuid}[0] + assert_equal Job::Cancelled, child.state + end end -- 2.39.5