2880: API server saves node statistics from pings.
[arvados.git] / services / api / app / models / node.rb
index 8e17a8765d4aff9d55aea61602559229e15728ed..71d4dea2c0cc815c7b29c30c8d0d7dac40c31cf1 100644 (file)
@@ -1,5 +1,5 @@
 class Node < ArvadosModel
-  include AssignUuid
+  include HasUuid
   include KindAndEtag
   include CommonApiTemplate
   serialize :info, Hash
@@ -8,13 +8,7 @@ class Node < ArvadosModel
 
   MAX_SLOTS = 64
 
-  @@confdir = if Rails.configuration.respond_to? :dnsmasq_conf_dir
-                Rails.configuration.dnsmasq_conf_dir
-              elsif File.exists? '/etc/dnsmasq.d/.'
-                '/etc/dnsmasq.d'
-              else
-                nil
-              end
+  @@confdir = Rails.configuration.dnsmasq_conf_dir
   @@domain = Rails.configuration.compute_node_domain rescue `hostname --domain`.strip
   @@nameservers = Rails.configuration.compute_node_nameservers
 
@@ -25,6 +19,7 @@ class Node < ArvadosModel
     t.add :last_ping_at
     t.add :slot_number
     t.add :status
+    t.add :crunch_worker_state
   end
   api_accessible :superuser, :extend => :user do |t|
     t.add :first_ping_at
@@ -41,6 +36,17 @@ class Node < ArvadosModel
     super || @@domain
   end
 
+  def crunch_worker_state
+    case self.info.andand['slurm_state']
+    when 'alloc', 'comp'
+      'busy'
+    when 'idle'
+      'idle'
+    else
+      'down'
+    end
+  end
+
   def status
     if !self.last_ping_at
       if Time.now - self.created_at > 5.minutes
@@ -58,9 +64,9 @@ class Node < ArvadosModel
   def ping(o)
     raise "must have :ip and :ping_secret" unless o[:ip] and o[:ping_secret]
 
-    if o[:ping_secret] != self.info[:ping_secret]
-      logger.info "Ping: secret mismatch: received \"#{o[:ping_secret]}\" != \"#{self.info[:ping_secret]}\""
-      return nil
+    if o[:ping_secret] != self.info['ping_secret']
+      logger.info "Ping: secret mismatch: received \"#{o[:ping_secret]}\" != \"#{self.info['ping_secret']}\""
+      raise ArvadosModel::UnauthorizedError.new("Incorrect ping_secret")
     end
     self.last_ping_at = Time.now
 
@@ -75,12 +81,16 @@ class Node < ArvadosModel
 
     # Record instance ID if not already known
     if o[:ec2_instance_id]
-      if !self.info[:ec2_instance_id] 
-        self.info[:ec2_instance_id] = o[:ec2_instance_id]
-        `ec2-create-tags #{o[:ec2_instance_id]} --tag 'Name=#{self.uuid}'`
-      elsif self.info[:ec2_instance_id] != o[:ec2_instance_id]
+      if !self.info['ec2_instance_id']
+        self.info['ec2_instance_id'] = o[:ec2_instance_id]
+        if (Rails.configuration.compute_node_ec2_tag_enable rescue true)
+          tag_cmd = ("ec2-create-tags #{o[:ec2_instance_id]} " +
+                     "--tag 'Name=#{self.uuid}'")
+          `#{tag_cmd}`
+        end
+      elsif self.info['ec2_instance_id'] != o[:ec2_instance_id]
         logger.debug "Multiple nodes have credentials for #{self.uuid}"
-        raise "#{self.uuid} is already running at #{self.info[:ec2_instance_id]} so rejecting ping from #{o[:ec2_instance_id]}"
+        raise "#{self.uuid} is already running at #{self.info['ec2_instance_id']} so rejecting ping from #{o[:ec2_instance_id]}"
       end
     end
 
@@ -98,8 +108,19 @@ class Node < ArvadosModel
         raise "No available node slots" if try_slot == MAX_SLOTS
       end while true
       self.hostname = self.class.hostname_for_slot(self.slot_number)
-      if info[:ec2_instance_id]
-        `ec2-create-tags #{self.info[:ec2_instance_id]} --tag 'hostname=#{self.hostname}'`
+      if info['ec2_instance_id']
+        if (Rails.configuration.compute_node_ec2_tag_enable rescue true)
+          `ec2-create-tags #{self.info['ec2_instance_id']} --tag 'hostname=#{self.hostname}'`
+        end
+      end
+    end
+
+    # Record other basic stats
+    ['total_cpu_cores', 'total_ram_mb', 'total_scratch_mb'].each do |key|
+      if value = (o[key] or o[key.to_sym])
+        self.info[key] = value
+      else
+        self.info.delete(key)
       end
     end
 
@@ -107,35 +128,45 @@ class Node < ArvadosModel
   end
 
   def start!(ping_url_method)
-    ensure_permission_to_update
-    ping_url = ping_url_method.call({ uuid: self.uuid, ping_secret: self.info[:ping_secret] })
-    ec2_args = ["--user-data '#{ping_url}'",
-                "-t c1.xlarge -n 1",
-                Rails.configuration.compute_node_ec2run_args,
-                Rails.configuration.compute_node_ami
-               ]
-    ec2run_cmd = ["ec2-run-instances",
-                  "--client-token", self.uuid,
-                  ec2_args].flatten.join(' ')
-    ec2spot_cmd = ["ec2-request-spot-instances",
-                   "-p #{Rails.configuration.compute_node_spot_bid} --type one-time",
-                   ec2_args].flatten.join(' ')
-    self.info[:ec2_run_command] = ec2run_cmd
-    self.info[:ec2_spot_command] = ec2spot_cmd
-    self.info[:ec2_start_command] = ec2spot_cmd
+    ensure_permission_to_save
+    ping_url = ping_url_method.call({ id: self.uuid, ping_secret: self.info['ping_secret'] })
+    if (Rails.configuration.compute_node_ec2run_args and
+        Rails.configuration.compute_node_ami)
+      ec2_args = ["--user-data '#{ping_url}'",
+                  "-t c1.xlarge -n 1",
+                  Rails.configuration.compute_node_ec2run_args,
+                  Rails.configuration.compute_node_ami
+                 ]
+      ec2run_cmd = ["ec2-run-instances",
+                    "--client-token", self.uuid,
+                    ec2_args].flatten.join(' ')
+      ec2spot_cmd = ["ec2-request-spot-instances",
+                     "-p #{Rails.configuration.compute_node_spot_bid} --type one-time",
+                     ec2_args].flatten.join(' ')
+    else
+      ec2run_cmd = ''
+      ec2spot_cmd = ''
+    end
+    self.info['ec2_run_command'] = ec2run_cmd
+    self.info['ec2_spot_command'] = ec2spot_cmd
+    self.info['ec2_start_command'] = ec2spot_cmd
     logger.info "#{self.uuid} ec2_start_command= #{ec2spot_cmd.inspect}"
     result = `#{ec2spot_cmd} 2>&1`
-    self.info[:ec2_start_result] = result
+    self.info['ec2_start_result'] = result
     logger.info "#{self.uuid} ec2_start_result= #{result.inspect}"
     result.match(/INSTANCE\s*(i-[0-9a-f]+)/) do |m|
       instance_id = m[1]
-      self.info[:ec2_instance_id] = instance_id
-      `ec2-create-tags #{instance_id} --tag 'Name=#{self.uuid}'`
+      self.info['ec2_instance_id'] = instance_id
+      if (Rails.configuration.compute_node_ec2_tag_enable rescue true)
+        `ec2-create-tags #{instance_id} --tag 'Name=#{self.uuid}'`
+      end
     end
     result.match(/SPOTINSTANCEREQUEST\s*(sir-[0-9a-f]+)/) do |m|
       sir_id = m[1]
-      self.info[:ec2_sir_id] = sir_id
-      `ec2-create-tags #{sir_id} --tag 'Name=#{self.uuid}'`
+      self.info['ec2_sir_id'] = sir_id
+      if (Rails.configuration.compute_node_ec2_tag_enable rescue true)
+        `ec2-create-tags #{sir_id} --tag 'Name=#{self.uuid}'`
+      end
     end
     self.save!
   end
@@ -143,7 +174,7 @@ class Node < ArvadosModel
   protected
 
   def ensure_ping_secret
-    self.info[:ping_secret] ||= rand(2**256).to_s(36)
+    self.info['ping_secret'] ||= rand(2**256).to_s(36)
   end
 
   def dnsmasq_update