X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c4b94d62b9f88adfc317022143954b1f872b3c9e..8f56eb6a78c03ee20c3b2700c9108bd7dd1de168:/app/models/node.rb diff --git a/app/models/node.rb b/app/models/node.rb index 6f73d4abeb..853b2712c7 100644 --- a/app/models/node.rb +++ b/app/models/node.rb @@ -1,15 +1,57 @@ -class Node < ActiveRecord::Base +class Node < OrvosModel include AssignUuid + include KindAndEtag + include CommonApiTemplate serialize :info, Hash before_validation :ensure_ping_secret + after_update :dnsmasq_update MAX_SLOTS = 64 + @@confdir = if Rails.configuration.respond_to? :dnsmasq_conf_dir + Rails.configuration.dnsmasq_conf_dir + elsif File.exists? '/etc/dnsmasq.d/.' + '/etc/dnsmasq.d' + else + nil + end + @@domain = Rails.configuration.compute_node_domain rescue `hostname --domain`.strip + @@nameservers = Rails.configuration.compute_node_nameservers + + api_accessible :superuser, :extend => :common do |t| + t.add :hostname + t.add :domain + t.add :ip_address + t.add :first_ping_at + t.add :last_ping_at + t.add :info + t.add :status + t.add lambda { |x| @@nameservers }, :as => :nameservers + end + def info @info ||= Hash.new super end + def domain + super || @@domain + end + + def status + if !self.last_ping_at + if Time.now - self.created_at > 5.minutes + 'startup-fail' + else + 'pending' + end + elsif Time.now - self.last_ping_at > 1.hours + 'missing' + else + 'running' + end + end + def ping(o) raise "must have :ip and :ping_secret" unless o[:ip] and o[:ping_secret] @@ -19,6 +61,8 @@ class Node < ActiveRecord::Base end self.last_ping_at = Time.now + @bypass_orvos_authorization = true + # Record IP address if self.ip_address.nil? logger.info "#{self.uuid} ip_address= #{o[:ip]}" @@ -27,39 +71,65 @@ class Node < ActiveRecord::Base end # Record instance ID if not already known - self.info[:ec2_instance_id] ||= o[:ec2_instance_id] + if !self.info[:ec2_instance_id] and o[:ec2_instance_id] + self.info[:ec2_instance_id] = o[:ec2_instance_id] + `ec2-create-tags #{self.info[:ec2_instance_id]} --tag 'Name=#{self.uuid}'` + end # Assign hostname if self.slot_number.nil? try_slot = 0 begin self.slot_number = try_slot - try_slot += 1 - break if self.save rescue nil + begin + self.save! + break + rescue ActiveRecord::RecordNotUnique + try_slot += 1 + end raise "No available node slots" if try_slot == MAX_SLOTS end while true - self.hostname = "compute#{self.slot_number}" + self.hostname = self.class.hostname_for_slot(self.slot_number) + if info[:ec2_instance_id] + `ec2-create-tags #{self.info[:ec2_instance_id]} --tag 'hostname=#{self.hostname}'` + end end save end def start!(ping_url_method) + ensure_permission_to_update ping_url = ping_url_method.call({ uuid: self.uuid, ping_secret: self.info[:ping_secret] }) - cmd = ["ec2-run-instances", - "--user-data '#{ping_url}'", - "-t c1.xlarge -n 1 -g orvos-compute", - "ami-68ca6901" - ].join(' ') - self.info[:ec2_start_command] = cmd - logger.info "#{self.uuid} ec2_start_command= #{cmd.inspect}" - result = `#{cmd} 2>&1` + ec2_args = ["--user-data '#{ping_url}'", + "-t c1.xlarge -n 1", + "-g", Rails.configuration.compute_node_security_group, + Rails.configuration.compute_node_ami + ] + ec2run_cmd = ["ec2-run-instances", + "--client-token", self.uuid, + ec2_args].flatten.join(' ') + ec2spot_cmd = ["ec2-request-spot-instances", + "-p #{Rails.configuration.compute_node_spot_bid} --type one-time", + ec2_args].flatten.join(' ') + self.info[:ec2_run_command] = ec2run_cmd + self.info[:ec2_spot_command] = ec2spot_cmd + self.info[:ec2_start_command] = ec2spot_cmd + logger.info "#{self.uuid} ec2_start_command= #{ec2spot_cmd.inspect}" + result = `#{ec2spot_cmd} 2>&1` self.info[:ec2_start_result] = result logger.info "#{self.uuid} ec2_start_result= #{result.inspect}" result.match(/INSTANCE\s*(i-[0-9a-f]+)/) do |m| - self.info[:ec2_instance_id] = m[1] - self.save! + instance_id = m[1] + self.info[:ec2_instance_id] = instance_id + `ec2-create-tags #{instance_id} --tag 'Name=#{self.uuid}'` + end + result.match(/SPOTINSTANCEREQUEST\s*(sir-[0-9a-f]+)/) do |m| + sir_id = m[1] + self.info[:ec2_sir_id] = sir_id + `ec2-create-tags #{sir_id} --tag 'Name=#{self.uuid}'` end + self.save! end protected @@ -67,4 +137,52 @@ class Node < ActiveRecord::Base def ensure_ping_secret self.info[:ping_secret] ||= rand(2**256).to_s(36) end + + def dnsmasq_update + if self.hostname_changed? or self.ip_address_changed? + if self.hostname and self.ip_address + self.class.dnsmasq_update(self.hostname, self.ip_address) + end + end + end + + def self.dnsmasq_update(hostname, ip_address) + return unless @@confdir + ptr_domain = ip_address. + split('.').reverse.join('.').concat('.in-addr.arpa') + hostfile = File.join @@confdir, hostname + File.open hostfile, 'w' do |f| + f.puts "address=/#{hostname}/#{ip_address}" + f.puts "address=/#{hostname}.#{@@domain}/#{ip_address}" if @@domain + f.puts "ptr-record=#{ptr_domain},#{hostname}" + end + File.open(File.join(@@confdir, 'restart.txt'), 'w') do |f| + # this should trigger a dnsmasq restart + end + end + + def self.hostname_for_slot(slot_number) + "compute#{slot_number}" + end + + # At startup, make sure all DNS entries exist. Otherwise, slurmctld + # will refuse to start. + if @@confdir and + !File.exists? (File.join(@@confdir, hostname_for_slot(MAX_SLOTS-1))) + (0..MAX_SLOTS-1).each do |slot_number| + hostname = hostname_for_slot(slot_number) + hostfile = File.join @@confdir, hostname + if !File.exists? hostfile + dnsmasq_update(hostname, '127.40.4.0') + end + end + end + + def permission_to_update + @bypass_orvos_authorization or super + end + + def permission_to_create + current_user and current_user.is_admin + end end