3 Copyright (C) The Arvados Authors. All rights reserved.
5 SPDX-License-Identifier: CC-BY-SA-3.0
18 @@NODEDATA_DIR = "/var/tmp/arv-node-data"
19 @@PUPPET_CONFFILE = "/etc/puppet/puppet.conf"
20 @@HOST_STATEFILE = "/var/run/arvados-compute-ping-hoststate.json"
22 def initialize(args, stdout, stderr)
25 @stderr_loglevel = ((args.first == "quiet") ?
26 Syslog::LOG_ERR : Syslog::LOG_DEBUG)
27 @puppet_disabled = false
28 @syslog = Syslog.open("arvados-compute-ping",
29 Syslog::LOG_CONS | Syslog::LOG_PID,
31 @puppetless = File.exist?('/compute-node.puppetless')
35 load_puppet_conf unless @puppetless
37 @host_state = JSON.parse(IO.read(@@HOST_STATEFILE))
50 if pong["hostname"] and pong["domain"] and pong["first_ping_at"]
53 "fqdn" => (Socket.gethostbyname(Socket.gethostname).first rescue nil),
55 ["busy", "idle"].include?(pong["crunch_worker_state"]),
60 if hostname_changed?(pong)
61 disable_puppet unless @puppetless
63 update_host_state("fqdn" => fqdn_from_pong(pong),
64 "resumed_slurm" => false)
67 unless @host_state["resumed_slurm"]
68 run_puppet_agent unless @puppetless
69 resume_slurm_node(pong["hostname"])
70 update_host_state("resumed_slurm" => true)
74 log("Last ping at #{pong['last_ping_at']}")
78 enable_puppet if @puppet_disabled and not @puppetless
84 def log(message, level=Syslog::LOG_INFO)
85 @syslog.log(level, message)
86 if level <= @stderr_loglevel
87 @stderr.write("#{Time.now.strftime("%Y-%m-%d %H:%M:%S")} #{message}\n")
91 def abort(message, code=1)
92 log(message, Syslog::LOG_ERR)
96 def run_and_check(cmd_a, accept_codes, io_opts, &block)
97 result = IO.popen(cmd_a, "r", io_opts, &block)
98 unless accept_codes.include?($?.exitstatus)
99 abort("#{cmd_a} exited #{$?.exitstatus}")
104 DEFAULT_ACCEPT_CODES=[0]
105 def check_output(cmd_a, accept_codes=DEFAULT_ACCEPT_CODES, io_opts={})
106 # Run a command, check the exit status, and return its stdout as a string.
107 run_and_check(cmd_a, accept_codes, io_opts) do |pipe|
112 def check_command(cmd_a, accept_codes=DEFAULT_ACCEPT_CODES, io_opts={})
113 # Run a command, send stdout to syslog, and check the exit status.
114 run_and_check(cmd_a, accept_codes, io_opts) do |pipe|
115 pipe.each_line do |line|
117 log("#{cmd_a.first}: #{line}") unless line.empty?
122 def replace_file(path, body)
123 open(path, "w") { |f| f.write(body) }
126 def update_host_state(updates_h)
127 @host_state.merge!(updates_h)
128 replace_file(@@HOST_STATEFILE, @host_state.to_json)
132 check_command(["puppet", "agent", "--disable"])
133 @puppet_disabled = true
135 # Wait for any running puppet agents to finish.
136 check_output(["pgrep", "puppet"], 0..1)
137 break if $?.exitstatus == 1
143 check_command(["puppet", "agent", "--enable"])
144 @puppet_disabled = false
149 ping_uri_s = File.read(File.join(@@NODEDATA_DIR, "arv-ping-url"))
151 abort("ping URL file is not present yet, skipping run")
154 ping_uri = URI.parse(ping_uri_s)
155 payload_h = CGI.parse(ping_uri.query)
157 # Collect all extra data to be sent
158 dirname = File.join(@@NODEDATA_DIR, "meta-data")
159 Dir.open(dirname).each do |basename|
160 filename = File.join(dirname, basename)
161 if File.file?(filename)
162 payload_h[basename.gsub('-', '_')] = File.read(filename).chomp
167 @ping_req = Net::HTTP::Post.new(ping_uri.to_s)
168 @ping_req.set_form_data(payload_h)
169 @ping_client = Net::HTTP.new(ping_uri.host, ping_uri.port)
170 @ping_client.use_ssl = ping_uri.scheme == 'https'
175 response = @ping_client.start do |http|
176 http.request(@ping_req)
178 if response.is_a? Net::HTTPSuccess
179 pong = JSON.parse(response.body)
181 raise "response was a #{response}"
183 rescue JSON::ParserError => error
184 abort("Error sending ping: could not parse JSON response: #{error}")
186 abort("Error sending ping: #{error}")
189 replace_file(File.join(@@NODEDATA_DIR, "pong.json"), response.body)
190 if pong["errors"] then
191 log(pong["errors"].join("; "), Syslog::LOG_ERR)
192 if pong["errors"].grep(/Incorrect ping_secret/).any?
201 # Parse Puppet configuration suitable for rewriting.
202 # Save certnames in @puppet_certnames.
203 # Save other functional configuration lines in @puppet_conf.
205 @puppet_certnames = []
206 open(@@PUPPET_CONFFILE, "r") do |conffile|
207 conffile.each_line do |line|
208 key, value = line.strip.split(/\s*=\s*/, 2)
210 @puppet_certnames << value
211 elsif not (key.nil? or key.empty? or key.start_with?("#"))
218 def fqdn_from_pong(pong)
219 "#{pong['hostname']}.#{pong['domain']}"
222 def certname_from_pong(pong)
223 fqdn = fqdn_from_pong(pong).sub(".", ".compute.")
224 "#{pong['first_ping_at'].gsub(':', '-').downcase}.#{fqdn}"
227 def hostname_changed?(pong)
229 (@host_state["fqdn"] != fqdn_from_pong(pong))
231 (@host_state["fqdn"] != fqdn_from_pong(pong)) or
232 (@puppet_certnames != [certname_from_pong(pong)])
236 def rename_host(pong)
237 new_fqdn = fqdn_from_pong(pong)
238 log("Renaming host from #{@host_state["fqdn"]} to #{new_fqdn}")
240 replace_file("/etc/hostname", "#{new_fqdn.split('.', 2).first}\n")
241 check_output(["hostname", new_fqdn])
243 ip_address = check_output(["facter", "ipaddress"]).chomp
244 esc_address = Regexp.escape(ip_address)
245 check_command(["sed", "-i", "/etc/hosts",
246 "-e", "s/^#{esc_address}.*$/#{ip_address}\t#{new_fqdn}/"])
249 new_conflines = @puppet_conf + ["\n[agent]\n",
250 "certname=#{certname_from_pong(pong)}\n"]
251 replace_file(@@PUPPET_CONFFILE, new_conflines.join(""))
252 FileUtils.remove_entry_secure("/var/lib/puppet/ssl")
257 log("Running puppet agent")
259 check_command(["puppet", "agent", "--onetime", "--no-daemonize",
260 "--no-splay", "--detailed-exitcodes",
261 "--ignorecache", "--no-usecacheonfailure"],
262 [0, 2], {err: [:child, :out]})
265 def resume_slurm_node(node_name)
266 current_state = check_output(["sinfo", "--noheader", "-o", "%t",
267 "-n", node_name]).chomp
268 if %w(down drain drng).include?(current_state)
269 log("Resuming node in SLURM")
270 check_command(["scontrol", "update", "NodeName=#{node_name}",
271 "State=RESUME"], [0], {err: [:child, :out]})
276 LOCK_DIRNAME = "/var/lock/arvados-compute-node.lock"
278 Dir.mkdir(LOCK_DIRNAME)
285 ping_sender = ComputeNodePing.new(ARGV, $stdout, $stderr)
288 Dir.rmdir(LOCK_DIRNAME)
289 ping_sender.cleanup unless ping_sender.nil?