X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/06686c47a0f99f5b5f292cde2d391024bf107514..788ecdf8085f5e69cd3dc960f510b49f11432cb3:/services/api/script/crunch-dispatch.rb diff --git a/services/api/script/crunch-dispatch.rb b/services/api/script/crunch-dispatch.rb index d3147225d6..44ea396dc7 100755 --- a/services/api/script/crunch-dispatch.rb +++ b/services/api/script/crunch-dispatch.rb @@ -1,5 +1,6 @@ #!/usr/bin/env ruby +require 'shellwords' include Process $options = {} @@ -190,6 +191,23 @@ class Dispatcher nodelist end + def fail_job job, message + $stderr.puts "dispatch: #{job.uuid}: #{message}" + begin + Log.new(object_uuid: job.uuid, + event_type: 'dispatch', + owner_uuid: job.owner_uuid, + summary: message, + properties: {"text" => message}).save! + rescue + $stderr.puts "dispatch: log.create failed" + end + job.state = "Failed" + if not job.save + $stderr.puts "dispatch: job.save failed" + end + end + def start_jobs @todo.each do |job| next if @running[job.uuid] @@ -232,12 +250,24 @@ class Dispatcher "GEM_PATH=#{ENV['GEM_PATH']}") end - job_auth = ApiClientAuthorization. - new(user: User.where('uuid=?', job.modified_by_user_uuid).first, - api_client_id: 0) - if not job_auth.save - $stderr.puts "dispatch: job_auth.save failed" - next + @authorizations ||= {} + if @authorizations[job.uuid] and + @authorizations[job.uuid].user.uuid != job.modified_by_user_uuid + # We already made a token for this job, but we need a new one + # because modified_by_user_uuid has changed (the job will run + # as a different user). + @authorizations[job.uuid].update_attributes expires_at: Time.now + @authorizations[job.uuid] = nil + end + if not @authorizations[job.uuid] + auth = ApiClientAuthorization. + new(user: User.where('uuid=?', job.modified_by_user_uuid).first, + api_client_id: 0) + if not auth.save + $stderr.puts "dispatch: auth.save failed" + next + end + @authorizations[job.uuid] = auth end crunch_job_bin = (ENV['CRUNCH_JOB_BIN'] || `which arv-crunch-job`.strip) @@ -245,70 +275,76 @@ class Dispatcher raise "No CRUNCH_JOB_BIN env var, and crunch-job not in path." end - require 'shellwords' - arvados_internal = Rails.configuration.git_internal_dir if not File.exists? arvados_internal $stderr.puts `mkdir -p #{arvados_internal.shellescape} && cd #{arvados_internal.shellescape} && git init --bare` end - repo_root = Rails.configuration.git_repositories_dir - src_repo = File.join(repo_root, job.repository + '.git') - if not File.exists? src_repo - src_repo = File.join(repo_root, job.repository, '.git') + git = "git --git-dir=#{arvados_internal.shellescape}" + + # @fetched_commits[V]==true if we know commit V exists in the + # arvados_internal git repository. + @fetched_commits ||= {} + if !@fetched_commits[job.script_version] + + repo_root = Rails.configuration.git_repositories_dir + src_repo = File.join(repo_root, job.repository + '.git') if not File.exists? src_repo - $stderr.puts "dispatch: No #{job.repository}.git or #{job.repository}/.git at #{repo_root}" - sleep 1 - next + src_repo = File.join(repo_root, job.repository, '.git') + if not File.exists? src_repo + fail_job job, "No #{job.repository}.git or #{job.repository}/.git at #{repo_root}" + next + end end - end - - git = "git --git-dir=#{arvados_internal.shellescape}" - # check if the commit needs to be fetched or not - commit_rev = `#{git} rev-list -n1 #{job.script_version.shellescape} 2>/dev/null`.chomp - unless $? == 0 and commit_rev == job.script_version - # commit does not exist in internal repository, so import the source repository using git fetch-pack - cmd = "#{git} fetch-pack --no-progress --all #{src_repo.shellescape}" - $stderr.puts cmd - $stderr.puts `#{cmd}` - unless $? == 0 - $stderr.puts "dispatch: git fetch-pack failed" - sleep 1 - next + # check if the commit needs to be fetched or not + commit_rev = `#{git} rev-list -n1 #{job.script_version.shellescape} 2>/dev/null`.chomp + unless $? == 0 and commit_rev == job.script_version + # commit does not exist in internal repository, so import the source repository using git fetch-pack + cmd = "#{git} fetch-pack --no-progress --all #{src_repo.shellescape}" + $stderr.puts cmd + $stderr.puts `#{cmd}` + unless $? == 0 + fail_job job, "git fetch-pack failed" + next + end end + @fetched_commits[job.script_version] = true end - # check if the commit needs to be tagged with this job uuid - tag_rev = `#{git} rev-list -n1 #{job.uuid.shellescape} 2>/dev/null`.chomp - if $? != 0 - # no job tag found, so create one - cmd = "#{git} tag #{job.uuid.shellescape} #{job.script_version.shellescape}" - $stderr.puts cmd - $stderr.puts `#{cmd}` - unless $? == 0 - $stderr.puts "dispatch: git tag failed" - sleep 1 - next - end - else - # job tag found, check that it has the expected revision - unless tag_rev == job.script_version - # Uh oh, the tag doesn't point to the revision we were expecting. - # Someone has been monkeying with the job record and/or git. - $stderr.puts "dispatch: Already a tag #{job.script_version} pointing to commit #{tag_rev} but expected commit #{job.script_version}" - job.state = "Failed" - if not job.save - $stderr.puts "dispatch: job.save failed" + # @job_tags[J]==V if we know commit V has been tagged J in the + # arvados_internal repository. (J is a job UUID, V is a commit + # sha1.) + @job_tags ||= {} + if not @job_tags[job.uuid] + # check if the commit needs to be tagged with this job uuid + tag_rev = `#{git} rev-list -n1 #{job.uuid.shellescape} 2>/dev/null`.chomp + if $? != 0 + # no job tag found, so create one + cmd = "#{git} tag #{job.uuid.shellescape} #{job.script_version.shellescape}" + $stderr.puts cmd + $stderr.puts `#{cmd}` + unless $? == 0 + fail_job job, "git tag failed" + next + end + else + # job tag found, check that it has the expected revision + unless tag_rev == job.script_version + # Uh oh, the tag doesn't point to the revision we were expecting. + # Someone has been monkeying with the job record and/or git. + fail_job job, "Existing tag #{job.uuid} points to commit #{tag_rev} but expected commit #{job.script_version}" next end - next end + @job_tags[job.uuid] = job.script_version + elsif @job_tags[job.uuid] != job.script_version + fail_job job, "Existing tag #{job.uuid} points to commit #{@job_tags[job.uuid]} but this job uses commit #{job.script_version}" end cmd_args << crunch_job_bin cmd_args << '--job-api-token' - cmd_args << job_auth.api_token + cmd_args << @authorizations[job.uuid].api_token cmd_args << '--job' cmd_args << job.uuid cmd_args << '--git-dir' @@ -337,7 +373,7 @@ class Dispatcher buf: {stderr: '', stdout: ''}, started: false, sent_int: 0, - job_auth: job_auth, + job_auth: @authorizations[job.uuid], stderr_buf_to_flush: '', stderr_flushed_at: Time.new(0), bytes_logged: 0, @@ -620,7 +656,7 @@ class Dispatcher end else refresh_todo unless did_recently(:refresh_todo, 1.0) - update_node_status + update_node_status unless did_recently(:update_node_status, 1.0) unless @todo.empty? or did_recently(:start_jobs, 1.0) or $signal[:term] start_jobs end