X-Git-Url: https://git.arvados.org/arvados.git/blobdiff_plain/c550609485691d8107ae364bfc982569f81f1725..73872ccc5bb6b80a6049b44b0113085a9c2b6934:/services/api/app/models/commit.rb diff --git a/services/api/app/models/commit.rb b/services/api/app/models/commit.rb index 9e1176b9b3..921c690cd0 100644 --- a/services/api/app/models/commit.rb +++ b/services/api/app/models/commit.rb @@ -1,5 +1,13 @@ +# Copyright (C) The Arvados Authors. All rights reserved. +# +# SPDX-License-Identifier: AGPL-3.0 + +require 'request_error' + class Commit < ActiveRecord::Base - class GitError < StandardError + extend CurrentApiClient + + class GitError < RequestError def http_status 422 end @@ -29,7 +37,7 @@ class Commit < ActiveRecord::Base # repository can be the name of a locally hosted repository or a git # URL (see git-fetch(1)). Currently http, https, and git schemes are # supported. - def self.find_commit_range(current_user, repository, minimum, maximum, exclude) + def self.find_commit_range repository, minimum, maximum, exclude if minimum and minimum.empty? minimum = nil end @@ -56,12 +64,22 @@ class Commit < ActiveRecord::Base # Get the commit hash for the upper bound max_hash = nil - IO.foreach("|git rev-list --max-count=1 #{maximum.shellescape} --") do |line| + git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --" + IO.foreach("|#{git_max_hash_cmd}") do |line| max_hash = line.strip end - # If not found or string is invalid, nothing else to do - return [] if !max_hash or !git_check_ref_format(max_hash) + # If not found, nothing else to do + if !max_hash + logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output" + return [] + end + + # If string is invalid, nothing else to do + if !git_check_ref_format(max_hash) + logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}" + return [] + end resolved_exclude = nil if exclude @@ -81,12 +99,22 @@ class Commit < ActiveRecord::Base if minimum # Get the commit hash for the lower bound min_hash = nil - IO.foreach("|git rev-list --max-count=1 #{minimum.shellescape} --") do |line| + git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --" + IO.foreach("|#{git_min_hash_cmd}") do |line| min_hash = line.strip end - # If not found or string is invalid, nothing else to do - return [] if !min_hash or !git_check_ref_format(min_hash) + # If not found, nothing else to do + if !min_hash + logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output" + return [] + end + + # If string is invalid, nothing else to do + if !git_check_ref_format(min_hash) + logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}" + return [] + end # Now find all commits between them IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line| @@ -103,32 +131,69 @@ class Commit < ActiveRecord::Base end # Given a repository (url, or name of hosted repo) and commit sha1, - # copy the commit into the internal git repo and tag it with the - # given tag (typically a job UUID). + # copy the commit into the internal git repo (if necessary), and tag + # it with the given tag (typically a job UUID). # # The repo can be a remote url, but in this case sha1 must already # be present in our local cache for that repo: e.g., sha1 was just # returned by find_commit_range. - def self.tag_in_internal_repository repo, sha1, tag + def self.tag_in_internal_repository repo_name, sha1, tag unless git_check_ref_format tag raise ArgumentError.new "invalid tag #{tag}" end unless /^[0-9a-f]{40}$/ =~ sha1 raise ArgumentError.new "invalid sha1 #{sha1}" end - src_gitdir, _ = git_dir_for repo + src_gitdir, _ = git_dir_for repo_name + unless src_gitdir + raise ArgumentError.new "no local repository for #{repo_name}" + end dst_gitdir = Rails.configuration.git_internal_dir - must_pipe("echo #{sha1.shellescape}", - "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout", - "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q") - must_git(dst_gitdir, - "tag --force #{tag.shellescape} #{sha1.shellescape}") + + begin + commit_in_dst = must_git(dst_gitdir, "log -n1 --format=%H #{sha1.shellescape}^{commit}").strip + rescue GitError + commit_in_dst = false + end + + tag_cmd = "tag --force #{tag.shellescape} #{sha1.shellescape}^{commit}" + if commit_in_dst == sha1 + must_git(dst_gitdir, tag_cmd) + else + # git-fetch is faster than pack-objects|unpack-objects, but + # git-fetch can't fetch by sha1. So we first try to fetch a + # branch that has the desired commit, and if that fails (there + # is no such branch, or the branch we choose changes under us in + # race), we fall back to pack|unpack. + begin + branches = must_git(src_gitdir, + "branch --contains #{sha1.shellescape}") + m = branches.match(/^. (\w+)\n/) + if !m + raise GitError.new "commit is not on any branch" + end + branch = m[1] + must_git(dst_gitdir, + "fetch file://#{src_gitdir.shellescape} #{branch.shellescape}") + # Even if all of the above steps succeeded, we might still not + # have the right commit due to a race, in which case tag_cmd + # will fail, and we'll need to fall back to pack|unpack. So + # don't be tempted to condense this tag_cmd and the one in the + # rescue block into a single attempt. + must_git(dst_gitdir, tag_cmd) + rescue GitError + must_pipe("echo #{sha1.shellescape}", + "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout", + "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q") + must_git(dst_gitdir, tag_cmd) + end + end end protected - def self.remote_url? repository - /^(https?|git):\/\// =~ repository + def self.remote_url? repo_name + /^(https?|git):\/\// =~ repo_name end # Return [local_git_dir, is_remote]. If is_remote, caller must use @@ -152,8 +217,11 @@ class Commit < ActiveRecord::Base end def self.cache_dir_for git_url - Rails.root.join('tmp', 'git', Digest::SHA1.hexdigest(git_url) + ".git"). - to_s + File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s + end + + def self.cache_dir_base + Rails.root.join 'tmp', 'git-cache' end def self.fetch_remote_repository gitdir, git_url @@ -164,9 +232,16 @@ class Commit < ActiveRecord::Base unless /^[a-z]+:\/\// =~ git_url raise ArgumentError.new "invalid git url #{git_url}" end - FileUtils.mkdir_p gitdir + begin + must_git gitdir, "branch" + rescue GitError => e + raise unless /Not a git repository/ =~ e.to_s + # OK, this just means we need to create a blank cache repository + # before fetching. + FileUtils.mkdir_p gitdir + must_git gitdir, "init" + end must_git(gitdir, - "init", "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'") end @@ -174,14 +249,16 @@ class Commit < ActiveRecord::Base # Clear token in case a git helper tries to use it as a password. orig_token = ENV['ARVADOS_API_TOKEN'] ENV['ARVADOS_API_TOKEN'] = '' + last_output = '' begin git = "git --git-dir #{gitdir.shellescape}" cmds.each do |cmd| - must_pipe git+" "+cmd + last_output = must_pipe git+" "+cmd end ensure ENV['ARVADOS_API_TOKEN'] = orig_token end + return last_output end def self.must_pipe *cmds @@ -190,5 +267,6 @@ class Commit < ActiveRecord::Base if not $?.success? raise GitError.new "#{cmd}: #{$?}: #{out}" end + return out end end