refs #4926 Merge branch '4926-getting-started'
[arvados.git] / services / api / app / models / job.rb
index 12f82bee6ffc7cf6ea41396eca7a1e0682f9d757..0444528b6bda671a4327ef39fff1918fabebf9d8 100644 (file)
@@ -2,20 +2,24 @@ class Job < ArvadosModel
   include HasUuid
   include KindAndEtag
   include CommonApiTemplate
-  attr_protected :docker_image_locator
+  attr_protected :arvados_sdk_version, :docker_image_locator
   serialize :script_parameters, Hash
   serialize :runtime_constraints, Hash
   serialize :tasks_summary, Hash
   before_create :ensure_unique_submit_id
   after_commit :trigger_crunch_dispatch_if_cancelled, :on => :update
   before_validation :set_priority
+  before_validation :update_state_from_old_state_attrs
   validate :ensure_script_version_is_commit
+  validate :find_arvados_sdk_version
   validate :find_docker_image_locator
-  before_validation :verify_status
-  before_create :set_state_before_save
-  before_save :set_state_before_save
+  validate :validate_status
+  validate :validate_state_change
+  validate :ensure_no_collection_uuids_in_script_params
+  before_save :update_timestamps_when_state_changes
 
   has_many :commit_ancestors, :foreign_key => :descendant, :primary_key => :script_version
+  has_many(:nodes, foreign_key: :job_uuid, primary_key: :uuid)
 
   class SubmitIdReused < StandardError
   end
@@ -43,8 +47,10 @@ class Job < ArvadosModel
     t.add :nondeterministic
     t.add :repository
     t.add :supplied_script_version
+    t.add :arvados_sdk_version
     t.add :docker_image_locator
     t.add :queue_position
+    t.add :node_uuids
     t.add :description
   end
 
@@ -63,17 +69,18 @@ class Job < ArvadosModel
                       running: false)
   end
 
+  def node_uuids
+    nodes.map(&:uuid)
+  end
+
   def self.queue
-    self.where('started_at is ? and is_locked_by_uuid is ? and cancelled_at is ? and success is ?',
-               nil, nil, nil, nil).
-      order('priority desc, created_at')
+    self.where('state = ?', Queued).order('priority desc, created_at')
   end
 
   def queue_position
-    i = 0
-    Job::queue.each do |j|
-      if j[:uuid] == self.uuid
-        return i
+    Job::queue.each_with_index do |job, index|
+      if job[:uuid] == self.uuid
+        return index
       end
     end
     nil
@@ -84,6 +91,18 @@ class Job < ArvadosModel
       order('priority desc, created_at')
   end
 
+  def lock locked_by_uuid
+    transaction do
+      self.reload
+      unless self.state == Queued and self.is_locked_by_uuid.nil?
+        raise AlreadyLockedError
+      end
+      self.state = Running
+      self.is_locked_by_uuid = locked_by_uuid
+      self.save!
+    end
+  end
+
   protected
 
   def foreign_key_attributes
@@ -106,7 +125,7 @@ class Job < ArvadosModel
   end
 
   def ensure_script_version_is_commit
-    if self.is_locked_by_uuid and self.started_at
+    if self.state == Running
       # Apparently client has already decided to go for it. This is
       # needed to run a local job using a local working directory
       # instead of a commit-ish.
@@ -133,28 +152,45 @@ class Job < ArvadosModel
     true
   end
 
-  def find_docker_image_locator
-    # Find the Collection that holds the Docker image specified in the
-    # runtime constraints, and store its locator in docker_image_locator.
-    unless runtime_constraints.is_a? Hash
-      # We're still in validation stage, so we can't assume
-      # runtime_constraints isn't something horrible like an array or
-      # a string. Treat those cases as "no docker image supplied";
-      # other validations will fail anyway.
-      self.docker_image_locator = nil
-      return true
+  def resolve_runtime_constraint(key, attr_sym)
+    if ((runtime_constraints.is_a? Hash) and
+        (search = runtime_constraints[key]))
+      ok, result = yield search
+    else
+      ok, result = true, nil
     end
-    image_search = runtime_constraints['docker_image']
-    image_tag = runtime_constraints['docker_image_tag']
-    if image_search.nil?
-      self.docker_image_locator = nil
-      true
-    elsif coll = Collection.for_latest_docker_image(image_search, image_tag)
-      self.docker_image_locator = coll.portable_data_hash
-      true
+    if ok
+      send("#{attr_sym}=".to_sym, result)
     else
-      errors.add(:docker_image_locator, "not found for #{image_search}")
-      false
+      errors.add(attr_sym, result)
+    end
+    ok
+  end
+
+  def find_arvados_sdk_version
+    resolve_runtime_constraint("arvados_sdk_version",
+                               :arvados_sdk_version) do |git_search|
+      commits = Commit.find_commit_range(current_user, "arvados",
+                                         nil, git_search, nil)
+      if commits.nil? or commits.empty?
+        [false, "#{git_search} does not resolve to a commit"]
+      elsif not runtime_constraints["docker_image"]
+        [false, "cannot be specified without a Docker image constraint"]
+      else
+        [true, commits.first]
+      end
+    end
+  end
+
+  def find_docker_image_locator
+    resolve_runtime_constraint("docker_image",
+                               :docker_image_locator) do |image_search|
+      image_tag = runtime_constraints['docker_image_tag']
+      if coll = Collection.for_latest_docker_image(image_search, image_tag)
+        [true, coll.portable_data_hash]
+      else
+        [false, "not found for #{image_search}"]
+      end
     end
   end
 
@@ -193,7 +229,8 @@ class Job < ArvadosModel
           success_changed? or
           output_changed? or
           log_changed? or
-          tasks_summary_changed?
+          tasks_summary_changed? or
+          state_changed?
         logger.warn "User #{current_user.uuid if current_user} tried to change protected job attributes on locked #{self.class.to_s} #{uuid_was}"
         return false
       end
@@ -242,99 +279,132 @@ class Job < ArvadosModel
     end
   end
 
-  def verify_status
-    changed_attributes = self.changed
-
-    if new_record?
-      self.state = Queued
-    elsif 'state'.in? changed_attributes
-      case self.state
-      when Queued
-        self.running = false
-        self.success = nil
-      when Running
-        if !self.is_locked_by_uuid
-          return false
-        end
-        if !self.started_at
-          self.started_at = Time.now
-        end
-        self.running = true
-        self.success = nil
-      when Cancelled
-        if !self.cancelled_at
-          self.cancelled_at = Time.now
-        end
-        self.running = false
-        self.success = false
-      when Failed
-        if !self.finished_at
-          self.finished_at = Time.now
-        end
-        self.running = false
-        self.success = false
-      when Complete
-        if !self.finished_at
-          self.finished_at = Time.now
-        end
-        self.running = false
-        self.success = true
-      end
-    elsif 'cancelled_at'.in? changed_attributes
-      self.state = Cancelled
+  def update_timestamps_when_state_changes
+    return if not (state_changed? or new_record?)
+
+    case state
+    when Running
+      self.started_at ||= Time.now
+    when Failed, Complete
+      self.finished_at ||= Time.now
+    when Cancelled
+      self.cancelled_at ||= Time.now
+    end
+
+    # TODO: Remove the following case block when old "success" and
+    # "running" attrs go away. Until then, this ensures we still
+    # expose correct success/running flags to older clients, even if
+    # some new clients are writing only the new state attribute.
+    case state
+    when Queued
+      self.running = false
+      self.success = nil
+    when Running
+      self.running = true
+      self.success = nil
+    when Cancelled, Failed
       self.running = false
       self.success = false
-    elsif 'success'.in? changed_attributes
-      if self.cancelled_at
-        self.state = Cancelled
-        self.running = false
-        self.success = false
-      else
-        if self.success
-          self.state = Complete
-        else
-          self.state = Failed
-        end
-        if !self.finished_at
-          self.finished_at = Time.now
-        end
-        self.running = false
-      end
-    elsif 'running'.in? changed_attributes
-      if self.running
-        self.state = Running
-        if !self.started_at
-          self.started_at = Time.now
-        end
-      else
-        self.state = nil # let set_state_before_save determine what the state should be
-        self.started_at = nil
-      end
+    when Complete
+      self.running = false
+      self.success = true
     end
+    self.running ||= false # Default to false instead of nil.
+
+    @need_crunch_dispatch_trigger = true
+
     true
   end
 
-  def set_state_before_save
-    if !self.state
-      if self.cancelled_at
+  def update_state_from_old_state_attrs
+    # If a client has touched the legacy state attrs, update the
+    # "state" attr to agree with the updated values of the legacy
+    # attrs.
+    #
+    # TODO: Remove this method when old "success" and "running" attrs
+    # go away.
+    if cancelled_at_changed? or
+        success_changed? or
+        running_changed? or
+        state.nil?
+      if cancelled_at
         self.state = Cancelled
-      elsif self.success
-        self.state = Complete
-      elsif (!self.success.nil? && !self.success)
+      elsif success == false
         self.state = Failed
-      elsif (self.running && self.success.nil? && !self.cancelled_at)
+      elsif success == true
+        self.state = Complete
+      elsif running == true
         self.state = Running
-      elsif !self.started_at && !self.cancelled_at && !self.is_locked_by_uuid && self.success.nil?
+      else
         self.state = Queued
       end
     end
+    true
+  end
+
+  def validate_status
     if self.state.in?(States)
       true
     else
-      errors.add :state, "'#{state.inspect} must be one of: [#{States.join ', '}]"
+      errors.add :state, "#{state.inspect} must be one of: #{States.inspect}"
       false
     end
   end
 
+  def validate_state_change
+    ok = true
+    if self.state_changed?
+      ok = case self.state_was
+           when nil
+             # state isn't set yet
+             true
+           when Queued
+             # Permit going from queued to any state
+             true
+           when Running
+             # From running, may only transition to a finished state
+             [Complete, Failed, Cancelled].include? self.state
+           when Complete, Failed, Cancelled
+             # Once in a finished state, don't permit any more state changes
+             false
+           else
+             # Any other state transition is also invalid
+             false
+           end
+      if not ok
+        errors.add :state, "invalid change from #{self.state_was} to #{self.state}"
+      end
+    end
+    ok
+  end
+
+  def ensure_no_collection_uuids_in_script_params
+    # recursive_hash_search searches recursively through hashes and
+    # arrays in 'thing' for string fields matching regular expression
+    # 'pattern'.  Returns true if pattern is found, false otherwise.
+    def recursive_hash_search thing, pattern
+      if thing.is_a? Hash
+        thing.each do |k, v|
+          return true if recursive_hash_search v, pattern
+        end
+      elsif thing.is_a? Array
+        thing.each do |k|
+          return true if recursive_hash_search k, pattern
+        end
+      elsif thing.is_a? String
+        return true if thing.match pattern
+      end
+      false
+    end
+
+    # Fail validation if any script_parameters field includes a string containing a
+    # collection uuid pattern.
+    if self.script_parameters_changed?
+      if recursive_hash_search(self.script_parameters, Collection.uuid_regex)
+        self.errors.add :script_parameters, "must use portable_data_hash instead of collection uuid"
+        return false
+      end
+    end
+    true
+  end
 end