X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/ed413bb7071fe0d58a98828e89be21fef5681200..27daf08f38eec505c224e7776678b32d50241e13:/services/api/app/models/job.rb diff --git a/services/api/app/models/job.rb b/services/api/app/models/job.rb index 12f82bee6f..6c24293334 100644 --- a/services/api/app/models/job.rb +++ b/services/api/app/models/job.rb @@ -2,20 +2,25 @@ class Job < ArvadosModel include HasUuid include KindAndEtag include CommonApiTemplate - attr_protected :docker_image_locator + attr_protected :arvados_sdk_version, :docker_image_locator serialize :script_parameters, Hash serialize :runtime_constraints, Hash serialize :tasks_summary, Hash before_create :ensure_unique_submit_id after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update before_validation :set_priority + before_validation :update_state_from_old_state_attrs validate :ensure_script_version_is_commit validate :find_docker_image_locator - before_validation :verify_status - before_create :set_state_before_save - before_save :set_state_before_save + validate :find_arvados_sdk_version + validate :validate_status + validate :validate_state_change + validate :ensure_no_collection_uuids_in_script_params + before_save :tag_version_in_internal_repository + before_save :update_timestamps_when_state_changes has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version + has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid) class SubmitIdReused < StandardError end @@ -39,12 +44,13 @@ class Job < ArvadosModel t.add :log t.add :runtime_constraints t.add :tasks_summary - t.add :dependencies t.add :nondeterministic t.add :repository t.add :supplied_script_version + t.add :arvados_sdk_version t.add :docker_image_locator t.add :queue_position + t.add :node_uuids t.add :description end @@ -58,22 +64,23 @@ class Job < ArvadosModel ] def assert_finished - update_attributes(finished_at: finished_at || Time.now, + update_attributes(finished_at: finished_at || db_current_time, success: success.nil? ? false : success, running: false) end + def node_uuids + nodes.map(&:uuid) + end + def self.queue - self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ? and success is ?', - nil, nil, nil, nil). - order('priority desc, created_at') + self.where('state = ?', Queued).order('priority desc, created_at') end def queue_position - i = 0 - Job::queue.each do |j| - if j[:uuid] == self.uuid - return i + Job::queue.each_with_index do |job, index| + if job[:uuid] == self.uuid + return index end end nil @@ -84,6 +91,18 @@ class Job < ArvadosModel order('priority desc, created_at') end + def lock locked_by_uuid + transaction do + self.reload + unless self.state == Queued and self.is_locked_by_uuid.nil? + raise AlreadyLockedError + end + self.state = Running + self.is_locked_by_uuid = locked_by_uuid + self.save! + end + end + protected def foreign_key_attributes @@ -106,21 +125,43 @@ class Job < ArvadosModel end def ensure_script_version_is_commit - if self.is_locked_by_uuid and self.started_at + if state == Running # Apparently client has already decided to go for it. This is # needed to run a local job using a local working directory # instead of a commit-ish. return true end - if new_record? or script_version_changed? - sha1 = Commit.find_commit_range(current_user, self.repository, nil, self.script_version, nil)[0] rescue nil - if sha1 - self.supplied_script_version = self.script_version if self.supplied_script_version.nil? or self.supplied_script_version.empty? - self.script_version = sha1 - else - self.errors.add :script_version, "#{self.script_version} does not resolve to a commit" + if new_record? or repository_changed? or script_version_changed? + sha1 = Commit.find_commit_range(repository, + nil, script_version, nil).first + if not sha1 + errors.add :script_version, "#{script_version} does not resolve to a commit" return false end + if supplied_script_version.nil? or supplied_script_version.empty? + self.supplied_script_version = script_version + end + self.script_version = sha1 + end + true + end + + def tag_version_in_internal_repository + if state == Running + # No point now. See ensure_script_version_is_commit. + true + elsif errors.any? + # Won't be saved, and script_version might not even be valid. + true + elsif new_record? or repository_changed? or script_version_changed? + uuid_was = uuid + begin + assign_uuid + Commit.tag_in_internal_repository repository, script_version, uuid + rescue + uuid = uuid_was + raise + end end end @@ -133,47 +174,50 @@ class Job < ArvadosModel true end - def find_docker_image_locator - # Find the Collection that holds the Docker image specified in the - # runtime constraints, and store its locator in docker_image_locator. - unless runtime_constraints.is_a? Hash - # We're still in validation stage, so we can't assume - # runtime_constraints isn't something horrible like an array or - # a string. Treat those cases as "no docker image supplied"; - # other validations will fail anyway. - self.docker_image_locator = nil - return true + def resolve_runtime_constraint(key, attr_sym) + if ((runtime_constraints.is_a? Hash) and + (search = runtime_constraints[key])) + ok, result = yield search + else + ok, result = true, nil end - image_search = runtime_constraints['docker_image'] - image_tag = runtime_constraints['docker_image_tag'] - if image_search.nil? - self.docker_image_locator = nil - true - elsif coll = Collection.for_latest_docker_image(image_search, image_tag) - self.docker_image_locator = coll.portable_data_hash - true + if ok + send("#{attr_sym}=".to_sym, result) else - errors.add(:docker_image_locator, "not found for #{image_search}") - false + errors.add(attr_sym, result) end + ok end - def dependencies - deps = {} - queue = self.script_parameters.values - while not queue.empty? - queue = queue.flatten.compact.collect do |v| - if v.is_a? Hash - v.values - elsif v.is_a? String - v.match(/^(([0-9a-f]{32})\b(\+[^,]+)?,?)*$/) do |locator| - deps[locator.to_s] = true - end - nil - end + def find_arvados_sdk_version + resolve_runtime_constraint("arvados_sdk_version", + :arvados_sdk_version) do |git_search| + commits = Commit.find_commit_range("arvados", + nil, git_search, nil) + if commits.empty? + [false, "#{git_search} does not resolve to a commit"] + elsif not runtime_constraints["docker_image"] + [false, "cannot be specified without a Docker image constraint"] + else + [true, commits.first] + end + end + end + + def find_docker_image_locator + runtime_constraints['docker_image'] = + Rails.configuration.default_docker_image_for_jobs if ((runtime_constraints.is_a? Hash) and + (runtime_constraints['docker_image']).nil? and + Rails.configuration.default_docker_image_for_jobs) + resolve_runtime_constraint("docker_image", + :docker_image_locator) do |image_search| + image_tag = runtime_constraints['docker_image_tag'] + if coll = Collection.for_latest_docker_image(image_search, image_tag) + [true, coll.portable_data_hash] + else + [false, "not found for #{image_search}"] end end - deps.keys end def permission_to_update @@ -193,7 +237,8 @@ class Job < ArvadosModel success_changed? or output_changed? or log_changed? or - tasks_summary_changed? + tasks_summary_changed? or + state_changed? logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}" return false end @@ -221,7 +266,7 @@ class Job < ArvadosModel # Ensure cancelled_at cannot be set to arbitrary non-now times, # or changed once it is set. if self.cancelled_at and not self.cancelled_at_was - self.cancelled_at = Time.now + self.cancelled_at = db_current_time self.cancelled_by_user_uuid = current_user.uuid self.cancelled_by_client_uuid = current_api_client.andand.uuid @need_crunch_dispatch_trigger = true @@ -242,99 +287,132 @@ class Job < ArvadosModel end end - def verify_status - changed_attributes = self.changed - - if new_record? - self.state = Queued - elsif 'state'.in? changed_attributes - case self.state - when Queued - self.running = false - self.success = nil - when Running - if !self.is_locked_by_uuid - return false - end - if !self.started_at - self.started_at = Time.now - end - self.running = true - self.success = nil - when Cancelled - if !self.cancelled_at - self.cancelled_at = Time.now - end - self.running = false - self.success = false - when Failed - if !self.finished_at - self.finished_at = Time.now - end - self.running = false - self.success = false - when Complete - if !self.finished_at - self.finished_at = Time.now - end - self.running = false - self.success = true - end - elsif 'cancelled_at'.in? changed_attributes - self.state = Cancelled + def update_timestamps_when_state_changes + return if not (state_changed? or new_record?) + + case state + when Running + self.started_at ||= db_current_time + when Failed, Complete + self.finished_at ||= db_current_time + when Cancelled + self.cancelled_at ||= db_current_time + end + + # TODO: Remove the following case block when old "success" and + # "running" attrs go away. Until then, this ensures we still + # expose correct success/running flags to older clients, even if + # some new clients are writing only the new state attribute. + case state + when Queued + self.running = false + self.success = nil + when Running + self.running = true + self.success = nil + when Cancelled, Failed self.running = false self.success = false - elsif 'success'.in? changed_attributes - if self.cancelled_at - self.state = Cancelled - self.running = false - self.success = false - else - if self.success - self.state = Complete - else - self.state = Failed - end - if !self.finished_at - self.finished_at = Time.now - end - self.running = false - end - elsif 'running'.in? changed_attributes - if self.running - self.state = Running - if !self.started_at - self.started_at = Time.now - end - else - self.state = nil # let set_state_before_save determine what the state should be - self.started_at = nil - end + when Complete + self.running = false + self.success = true end + self.running ||= false # Default to false instead of nil. + + @need_crunch_dispatch_trigger = true + true end - def set_state_before_save - if !self.state - if self.cancelled_at + def update_state_from_old_state_attrs + # If a client has touched the legacy state attrs, update the + # "state" attr to agree with the updated values of the legacy + # attrs. + # + # TODO: Remove this method when old "success" and "running" attrs + # go away. + if cancelled_at_changed? or + success_changed? or + running_changed? or + state.nil? + if cancelled_at self.state = Cancelled - elsif self.success - self.state = Complete - elsif (!self.success.nil? && !self.success) + elsif success == false self.state = Failed - elsif (self.running && self.success.nil? && !self.cancelled_at) + elsif success == true + self.state = Complete + elsif running == true self.state = Running - elsif !self.started_at && !self.cancelled_at && !self.is_locked_by_uuid && self.success.nil? + else self.state = Queued end end - + true + end + + def validate_status if self.state.in?(States) true else - errors.add :state, "'#{state.inspect} must be one of: [#{States.join ', '}]" + errors.add :state, "#{state.inspect} must be one of: #{States.inspect}" false end end + def validate_state_change + ok = true + if self.state_changed? + ok = case self.state_was + when nil + # state isn't set yet + true + when Queued + # Permit going from queued to any state + true + when Running + # From running, may only transition to a finished state + [Complete, Failed, Cancelled].include? self.state + when Complete, Failed, Cancelled + # Once in a finished state, don't permit any more state changes + false + else + # Any other state transition is also invalid + false + end + if not ok + errors.add :state, "invalid change from #{self.state_was} to #{self.state}" + end + end + ok + end + + def ensure_no_collection_uuids_in_script_params + # recursive_hash_search searches recursively through hashes and + # arrays in 'thing' for string fields matching regular expression + # 'pattern'. Returns true if pattern is found, false otherwise. + def recursive_hash_search thing, pattern + if thing.is_a? Hash + thing.each do |k, v| + return true if recursive_hash_search v, pattern + end + elsif thing.is_a? Array + thing.each do |k| + return true if recursive_hash_search k, pattern + end + elsif thing.is_a? String + return true if thing.match pattern + end + false + end + + # Fail validation if any script_parameters field includes a string containing a + # collection uuid pattern. + if self.script_parameters_changed? + if recursive_hash_search(self.script_parameters, Collection.uuid_regex) + self.errors.add :script_parameters, "must use portable_data_hash instead of collection uuid" + return false + end + end + true + end end