X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/fb5d279228a7adcafd858d9e137accd8010bb382..cf1dcfb8715822f7ac5fe4fce067197dd84bad54:/services/api/app/models/job.rb diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb index 51f2df4c80..4d63deb99c 100644 --- a/services/api/app/models/job.rb +++ b/services/api/app/models/job.rb @@ -1,3 +1,8 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +require 'log_reuse_info' require 'safe_json' class Job < ArvadosModel @@ -5,8 +10,8 @@ class Job < ArvadosModel include KindAndEtag include CommonApiTemplate extend CurrentApiClient + extend LogReuseInfo serialize :components, Hash - attr_protected :arvados_sdk_version, :docker_image_locator serialize :script_parameters, Hash serialize :runtime_constraints, Hash serialize :tasks_summary, Hash @@ -27,7 +32,7 @@ class Job < ArvadosModel has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid) - class SubmitIdReused < StandardError + class SubmitIdReused < RequestError end api_accessible :user, extend: :common do |t| @@ -77,6 +82,10 @@ class Job < ArvadosModel ["components"] end + def self.protected_attributes + [:arvados_sdk_version, :docker_image_locator] + end + def assert_finished update_attributes(finished_at: finished_at || db_current_time, success: success.nil? ? false : success, @@ -198,16 +207,6 @@ class Job < ArvadosModel filters end - # reuselog logs whatever the given block returns, if reuse logging - # is enabled. It accepts a block instead of a string because in some - # cases constructing the strings involves doing database queries, - # and we want to skip those queries when logging is disabled. - def self.reuselog - if Rails.configuration.log_reuse_decisions - Rails.logger.info("find_reusable: " + yield) - end - end - def self.find_reusable attrs, params, filters, read_users if filters.empty? # Translate older creation parameters into filters. filters = @@ -247,76 +246,96 @@ class Job < ArvadosModel end # Search for a reusable Job, and return it if found. - candidates = Job.readable_by(current_user).where( + candidates = Job.readable_by(current_user) + log_reuse_info { "starting with #{candidates.count} jobs readable by current user #{current_user.uuid}" } + + candidates = candidates.where( 'state = ? or (owner_uuid = ? and state in (?))', Job::Complete, current_user.uuid, [Job::Queued, Job::Running]) - reuselog { "have #{candidates.count} readable jobs in a reusable state" } + log_reuse_info(candidates) { "after filtering on job state ((state=Complete) or (state=Queued/Running and (submitted by current user)))" } digest = Job.sorted_hash_digest(attrs[:script_parameters]) candidates = candidates.where('script_parameters_digest = ?', digest) - reuselog { "have #{candidates.count} candidates after filtering on script_parameters_digest #{digest}" } + log_reuse_info(candidates) { "after filtering on script_parameters_digest #{digest}" } candidates = candidates.where('nondeterministic is distinct from ?', true) - reuselog { "have #{candidates.count} candidates after filtering on !nondeterministic" } + log_reuse_info(candidates) { "after filtering on !nondeterministic" } # prefer Running jobs over Queued candidates = candidates.order('state desc, created_at') candidates = apply_filters candidates, filters - reuselog { "have #{candidates.count} candidates after filtering on repo, script, and custom filters #{filters.inspect}" } + log_reuse_info(candidates) { "after filtering on repo, script, and custom filters #{filters.inspect}" } chosen = nil + chosen_output = nil incomplete_job = nil candidates.each do |j| if j.state != Job::Complete if !incomplete_job # We'll use this if we don't find a job that has completed - reuselog { "job #{j.uuid} is reusable, but unfinished; continuing search for completed jobs" } + log_reuse_info { "job #{j.uuid} is reusable, but unfinished; continuing search for completed jobs" } incomplete_job = j else - reuselog { "job #{j.uuid} is reusable, but unfinished, so not better than #{incomplete_job.uuid}; ignoring" } + log_reuse_info { "job #{j.uuid} is unfinished and we already have #{incomplete_job.uuid}; ignoring" } end elsif chosen == false # Ignore: we have already decided not to reuse any completed # job. - reuselog { "job #{j.uuid} output #{j.output} ignored, see above" } - elsif Rails.configuration.reuse_job_if_outputs_differ - if Collection.readable_by(current_user).find_by_portable_data_hash(j.output) - reuselog { "job #{j.uuid} with output #{j.output} is reusable; decision is final." } - return j - else - # Ignore: keep locking for an incomplete job or one whose + log_reuse_info { "job #{j.uuid} with output #{j.output} ignored, see above" } + elsif j.output.nil? + log_reuse_info { "job #{j.uuid} has nil output" } + elsif j.log.nil? + log_reuse_info { "job #{j.uuid} has nil log" } + elsif Rails.configuration.Containers.JobsAPI.ReuseJobIfOutputsDiffer + if !Collection.readable_by(current_user).find_by_portable_data_hash(j.output) + # Ignore: keep looking for an incomplete job or one whose # output is readable. - reuselog { "job #{j.uuid} output #{j.output} unavailable to user; continuing search" } + log_reuse_info { "job #{j.uuid} output #{j.output} unavailable to user; continuing search" } + elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.log) + # Ignore: keep looking for an incomplete job or one whose + # log is readable. + log_reuse_info { "job #{j.uuid} log #{j.log} unavailable to user; continuing search" } + else + log_reuse_info { "job #{j.uuid} with output #{j.output} is reusable; decision is final." } + return j end - elsif chosen - if chosen.output != j.output + elsif chosen_output + if chosen_output != j.output # If two matching jobs produced different outputs, run a new # job (or use one that's already running/queued) instead of # choosing one arbitrarily. - reuselog { "job #{j.uuid} output #{j.output} disagrees; forgetting about #{chosen.uuid} and ignoring any other finished jobs (see reuse_job_if_outputs_differ in application.default.yml)" } + log_reuse_info { "job #{j.uuid} output #{j.output} disagrees; forgetting about #{chosen.uuid} and ignoring any other finished jobs (see reuse_job_if_outputs_differ in application.default.yml)" } chosen = false else - reuselog { "job #{j.uuid} output #{j.output} agrees with chosen #{chosen.uuid}; continuing search in case others disagree" } + log_reuse_info { "job #{j.uuid} output #{j.output} agrees with chosen #{chosen.uuid}; continuing search in case other candidates have different outputs" } end # ...and that's the only thing we need to do once we've chosen # a job to reuse. elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.output) - # As soon as the output we will end up returning (if any) is - # decided, check whether it will be visible to the user; if - # not, any further investigation of reusable jobs is futile. - reuselog { "job #{j.uuid} output #{j.output} unavailable to user; now refusing to reuse any finished job" } + # This user cannot read the output of this job. Any other + # completed job will have either the same output (making it + # unusable) or a different output (making it unusable because + # reuse_job_if_outputs_different is turned off). Therefore, + # any further investigation of reusable jobs is futile. + log_reuse_info { "job #{j.uuid} output #{j.output} is unavailable to user; this means no finished job can be reused (see reuse_job_if_outputs_differ in application.default.yml)" } chosen = false + elsif !Collection.readable_by(current_user).find_by_portable_data_hash(j.log) + # This user cannot read the log of this job, don't try to reuse the + # job but consider if the output is consistent. + log_reuse_info { "job #{j.uuid} log #{j.log} is unavailable to user; continuing search" } + chosen_output = j.output else - reuselog { "job #{j.uuid} output #{j.output} can be reused; continuing search in case others disagree" } + log_reuse_info { "job #{j.uuid} with output #{j.output} can be reused; continuing search in case other candidates have different outputs" } chosen = j + chosen_output = j.output end end j = chosen || incomplete_job if j - reuselog { "done, #{j.uuid} was selected" } + log_reuse_info { "done, #{j.uuid} was selected" } else - reuselog { "done, nothing suitable" } + log_reuse_info { "done, nothing suitable" } end return j end @@ -472,9 +491,9 @@ class Job < ArvadosModel end def find_docker_image_locator - if runtime_constraints.is_a? Hash + if runtime_constraints.is_a? Hash and Rails.configuration.Containers.JobsAPI.DefaultDockerImage != "" runtime_constraints['docker_image'] ||= - Rails.configuration.default_docker_image_for_jobs + Rails.configuration.Containers.JobsAPI.DefaultDockerImage end resolve_runtime_constraint("docker_image", @@ -550,7 +569,7 @@ class Job < ArvadosModel def trigger_crunch_dispatch_if_cancelled if @need_crunch_dispatch_trigger - File.open(Rails.configuration.crunch_refresh_trigger, 'wb') do + File.open(Rails.configuration.Containers.JobsAPI.CrunchRefreshTrigger, 'wb') do # That's all, just create/touch a file for crunch-job to see. end end