13 @@NODEDATA_DIR = "/var/tmp/arv-node-data"
14 @@PUPPET_CONFFILE = "/etc/puppet/puppet.conf"
15 @@HOST_STATEFILE = "/var/run/arvados-compute-ping-hoststate.json"
17 def initialize(args, stdout, stderr)
20 @stderr_loglevel = ((args.first == "quiet") ?
21 Syslog::LOG_ERR : Syslog::LOG_DEBUG)
22 @puppet_disabled = false
23 @syslog = Syslog.open("arvados-compute-ping",
24 Syslog::LOG_CONS | Syslog::LOG_PID,
26 @puppetless = File.exist?('/compute-node.puppetless')
30 load_puppet_conf unless @puppetless
32 @host_state = JSON.parse(IO.read(@@HOST_STATEFILE))
45 if pong["hostname"] and pong["domain"] and pong["first_ping_at"]
48 "fqdn" => (Socket.gethostbyname(Socket.gethostname).first rescue nil),
50 ["busy", "idle"].include?(pong["crunch_worker_state"]),
55 if hostname_changed?(pong)
56 disable_puppet unless @puppetless
58 update_host_state("fqdn" => fqdn_from_pong(pong),
59 "resumed_slurm" => false)
62 unless @host_state["resumed_slurm"]
63 run_puppet_agent unless @puppetless
64 resume_slurm_node(pong["hostname"])
65 update_host_state("resumed_slurm" => true)
69 log("Last ping at #{pong['last_ping_at']}")
73 enable_puppet if @puppet_disabled and not @puppetless
79 def log(message, level=Syslog::LOG_INFO)
80 @syslog.log(level, message)
81 if level <= @stderr_loglevel
82 @stderr.write("#{Time.now.strftime("%Y-%m-%d %H:%M:%S")} #{message}\n")
86 def abort(message, code=1)
87 log(message, Syslog::LOG_ERR)
91 def run_and_check(cmd_a, accept_codes, io_opts, &block)
92 result = IO.popen(cmd_a, "r", io_opts, &block)
93 unless accept_codes.include?($?.exitstatus)
94 abort("#{cmd_a} exited #{$?.exitstatus}")
99 DEFAULT_ACCEPT_CODES=[0]
100 def check_output(cmd_a, accept_codes=DEFAULT_ACCEPT_CODES, io_opts={})
101 # Run a command, check the exit status, and return its stdout as a string.
102 run_and_check(cmd_a, accept_codes, io_opts) do |pipe|
107 def check_command(cmd_a, accept_codes=DEFAULT_ACCEPT_CODES, io_opts={})
108 # Run a command, send stdout to syslog, and check the exit status.
109 run_and_check(cmd_a, accept_codes, io_opts) do |pipe|
110 pipe.each_line do |line|
112 log("#{cmd_a.first}: #{line}") unless line.empty?
117 def replace_file(path, body)
118 open(path, "w") { |f| f.write(body) }
121 def update_host_state(updates_h)
122 @host_state.merge!(updates_h)
123 replace_file(@@HOST_STATEFILE, @host_state.to_json)
127 check_command(["puppet", "agent", "--disable"])
128 @puppet_disabled = true
130 # Wait for any running puppet agents to finish.
131 check_output(["pgrep", "puppet"], 0..1)
132 break if $?.exitstatus == 1
138 check_command(["puppet", "agent", "--enable"])
139 @puppet_disabled = false
144 ping_uri_s = File.read(File.join(@@NODEDATA_DIR, "arv-ping-url"))
146 abort("ping URL file is not present yet, skipping run")
149 ping_uri = URI.parse(ping_uri_s)
150 payload_h = CGI.parse(ping_uri.query)
152 # Collect all extra data to be sent
153 dirname = File.join(@@NODEDATA_DIR, "meta-data")
154 Dir.open(dirname).each do |basename|
155 filename = File.join(dirname, basename)
156 if File.file?(filename)
157 payload_h[basename.gsub('-', '_')] = File.read(filename).chomp
162 @ping_req = Net::HTTP::Post.new(ping_uri.to_s)
163 @ping_req.set_form_data(payload_h)
164 @ping_client = Net::HTTP.new(ping_uri.host, ping_uri.port)
165 @ping_client.use_ssl = ping_uri.scheme == 'https'
170 response = @ping_client.start do |http|
171 http.request(@ping_req)
173 if response.is_a? Net::HTTPSuccess
174 pong = JSON.parse(response.body)
176 raise "response was a #{response}"
178 rescue JSON::ParserError => error
179 abort("Error sending ping: could not parse JSON response: #{error}")
181 abort("Error sending ping: #{error}")
184 replace_file(File.join(@@NODEDATA_DIR, "pong.json"), response.body)
185 if pong["errors"] then
186 log(pong["errors"].join("; "), Syslog::LOG_ERR)
187 if pong["errors"].grep(/Incorrect ping_secret/).any?
196 # Parse Puppet configuration suitable for rewriting.
197 # Save certnames in @puppet_certnames.
198 # Save other functional configuration lines in @puppet_conf.
200 @puppet_certnames = []
201 open(@@PUPPET_CONFFILE, "r") do |conffile|
202 conffile.each_line do |line|
203 key, value = line.strip.split(/\s*=\s*/, 2)
205 @puppet_certnames << value
206 elsif not (key.nil? or key.empty? or key.start_with?("#"))
213 def fqdn_from_pong(pong)
214 "#{pong['hostname']}.#{pong['domain']}"
217 def certname_from_pong(pong)
218 fqdn = fqdn_from_pong(pong).sub(".", ".compute.")
219 "#{pong['first_ping_at'].gsub(':', '-').downcase}.#{fqdn}"
222 def hostname_changed?(pong)
224 (@host_state["fqdn"] != fqdn_from_pong(pong))
226 (@host_state["fqdn"] != fqdn_from_pong(pong)) or
227 (@puppet_certnames != [certname_from_pong(pong)])
231 def rename_host(pong)
232 new_fqdn = fqdn_from_pong(pong)
233 log("Renaming host from #{@host_state["fqdn"]} to #{new_fqdn}")
235 replace_file("/etc/hostname", "#{new_fqdn.split('.', 2).first}\n")
236 check_output(["hostname", new_fqdn])
238 ip_address = check_output(["facter", "ipaddress"]).chomp
239 esc_address = Regexp.escape(ip_address)
240 check_command(["sed", "-i", "/etc/hosts",
241 "-e", "s/^#{esc_address}.*$/#{ip_address}\t#{new_fqdn}/"])
244 new_conflines = @puppet_conf + ["\n[agent]\n",
245 "certname=#{certname_from_pong(pong)}\n"]
246 replace_file(@@PUPPET_CONFFILE, new_conflines.join(""))
247 FileUtils.remove_entry_secure("/var/lib/puppet/ssl")
252 log("Running puppet agent")
254 check_command(["puppet", "agent", "--onetime", "--no-daemonize",
255 "--no-splay", "--detailed-exitcodes",
256 "--ignorecache", "--no-usecacheonfailure"],
257 [0, 2], {err: [:child, :out]})
260 def resume_slurm_node(node_name)
261 current_state = check_output(["sinfo", "--noheader", "-o", "%t",
262 "-n", node_name]).chomp
263 if %w(down drain drng).include?(current_state)
264 log("Resuming node in SLURM")
265 check_command(["scontrol", "update", "NodeName=#{node_name}",
266 "State=RESUME"], [0], {err: [:child, :out]})
271 LOCK_DIRNAME = "/var/lock/arvados-compute-node.lock"
273 Dir.mkdir(LOCK_DIRNAME)
280 ping_sender = ComputeNodePing.new(ARGV, $stdout, $stderr)
283 Dir.rmdir(LOCK_DIRNAME)
284 ping_sender.cleanup unless ping_sender.nil?