class Commit < ActiveRecord::Base
- require 'shellwords'
+ extend CurrentApiClient
- # Make sure the specified commit really exists, and return the full
- # sha1 commit hash.
- #
- # Accepts anything "git rev-list" accepts, optionally (and
- # preferably) preceded by "repo_name:".
- #
- # Examples: "1234567", "master", "apps:1234567", "apps:master",
- # "apps:HEAD"
-
- # def self.find_by_commit_ish(commit_ish)
- # if only_valid_chars.match(commit_ish)
- # logger.warn "find_by_commit_ish called with string containing invalid characters: '#{commit_ish}'"
- # return nil
- # end
-
- # want_repo = nil
- # if commit_ish.index(':')
- # want_repo, commit_ish = commit_ish.split(':',2)
- # end
- # repositories.each do |repo_name, repo|
- # next if want_repo and want_repo != repo_name
- # ENV['GIT_DIR'] = repo[:git_dir]
- # # we're passing user input to a command line, this is a potential a security hole but I am reasonably confident that shellescape sanitizes the input adequately
- # IO.foreach("|git rev-list --max-count=1 --format=oneline 'origin/'#{commit_ish.shellescape} 2>/dev/null || git rev-list --max-count=1 --format=oneline ''#{commit_ish.shellescape}") do |line|
- # sha1, message = line.strip.split " ", 2
- # next if sha1.length != 40
- # begin
- # Commit.find_or_create_by_repository_name_and_sha1_and_message(repo_name, sha1, message[0..254])
- # rescue
- # logger.warn "find_or_create failed: repo_name #{repo_name} sha1 #{sha1} message #{message[0..254]}"
- # # Ignore cache failure. Commit is real. We should proceed.
- # end
- # return sha1
- # end
- # end
- # nil
- # end
-
- def self.find_commit_range(current_user, repository, minimum, maximum, exclude)
- only_valid_chars = /[^A-Za-z0-9_-]/
- if only_valid_chars.match(minimum) || only_valid_chars.match(maximum)
- logger.warn "find_commit_range called with string containing invalid characters: '#{minimum}', '#{maximum}'"
- return nil
+ class GitError < StandardError
+ def http_status
+ 422
end
+ end
- if minimum and minimum.empty?
- minimum = nil
+ def self.git_check_ref_format(e)
+ if !e or e.empty? or e[0] == '-' or e[0] == '$'
+ # definitely not valid
+ false
+ else
+ `git check-ref-format --allow-onelevel #{e.shellescape}`
+ $?.success?
end
-
- if !maximum
- maximum = "HEAD"
+ end
+
+ # Return an array of commits (each a 40-char sha1) satisfying the
+ # given criteria.
+ #
+ # Return [] if the revisions given in minimum/maximum are invalid or
+ # don't exist in the given repository.
+ #
+ # Raise ArgumentError if the given repository is invalid, does not
+ # exist, or cannot be read for any reason. (Any transient error that
+ # prevents commit ranges from resolving must raise rather than
+ # returning an empty array.)
+ #
+ # repository can be the name of a locally hosted repository or a git
+ # URL (see git-fetch(1)). Currently http, https, and git schemes are
+ # supported.
+ def self.find_commit_range repository, minimum, maximum, exclude
+ if minimum and minimum.empty?
+ minimum = nil
end
- # Get list of actual repository directories under management
- on_disk_repos = repositories
+ if minimum and !git_check_ref_format(minimum)
+ logger.warn "find_commit_range called with invalid minimum revision: '#{minimum}'"
+ return []
+ end
- # Get list of repository objects readable by user
- readable = Repository.readable_by(current_user)
+ if maximum and !git_check_ref_format(maximum)
+ logger.warn "find_commit_range called with invalid maximum revision: '#{maximum}'"
+ return []
+ end
- # filter repository objects on requested repository name
- if repository
- readable = readable.where(name: repository)
+ if !maximum
+ maximum = "HEAD"
end
- #puts "min #{minimum}"
- #puts "max #{maximum}"
- #puts "rep #{repository}"
+ gitdir, is_remote = git_dir_for repository
+ fetch_remote_repository gitdir, repository if is_remote
+ ENV['GIT_DIR'] = gitdir
commits = []
- readable.each do |r|
- if on_disk_repos[r.name]
- ENV['GIT_DIR'] = on_disk_repos[r.name][:git_dir]
-
- #puts "dir #{on_disk_repos[r.name][:git_dir]}"
- # We've filtered for invalid characters, so we can pass the contents of
- # minimum and maximum safely on the command line
-
- #puts "git rev-list --max-count=1 #{maximum}"
+ # Get the commit hash for the upper bound
+ max_hash = nil
+ git_max_hash_cmd = "git rev-list --max-count=1 #{maximum.shellescape} --"
+ IO.foreach("|#{git_max_hash_cmd}") do |line|
+ max_hash = line.strip
+ end
- # Get the commit hash for the upper bound
- max_hash = nil
- IO.foreach("|git rev-list --max-count=1 #{maximum}") do |line|
- max_hash = line.strip
- end
+ # If not found, nothing else to do
+ if !max_hash
+ logger.warn "no refs found looking for max_hash: `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` returned no output"
+ return []
+ end
- # If not found, nothing else to do
- next if !max_hash
+ # If string is invalid, nothing else to do
+ if !git_check_ref_format(max_hash)
+ logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_max_hash_cmd}` was invalid for max_hash: #{max_hash}"
+ return []
+ end
- resolved_exclude = nil
- if exclude
- resolved_exclude = []
- exclude.each do |e|
- IO.foreach("|git rev-list --max-count=1 #{e}") do |line|
- resolved_exclude.push(line.strip)
- end
+ resolved_exclude = nil
+ if exclude
+ resolved_exclude = []
+ exclude.each do |e|
+ if git_check_ref_format(e)
+ IO.foreach("|git rev-list --max-count=1 #{e.shellescape} --") do |line|
+ resolved_exclude.push(line.strip)
end
+ else
+ logger.warn "find_commit_range called with invalid exclude invalid characters: '#{exclude}'"
+ return []
end
+ end
+ end
- if minimum
- # Get the commit hash for the lower bound
- min_hash = nil
- IO.foreach("|git rev-list --max-count=1 #{minimum}") do |line|
- min_hash = line.strip
- end
+ if minimum
+ # Get the commit hash for the lower bound
+ min_hash = nil
+ git_min_hash_cmd = "git rev-list --max-count=1 #{minimum.shellescape} --"
+ IO.foreach("|#{git_min_hash_cmd}") do |line|
+ min_hash = line.strip
+ end
- # If not found, nothing else to do
- next if !min_hash
-
- # Now find all commits between them
- #puts "git rev-list #{min_hash}..#{max_hash}"
- IO.foreach("|git rev-list #{min_hash}..#{max_hash}") do |line|
- hash = line.strip
- commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
- end
+ # If not found, nothing else to do
+ if !min_hash
+ logger.warn "no refs found looking for min_hash: `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` returned no output"
+ return []
+ end
- commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
- else
- commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
- end
+ # If string is invalid, nothing else to do
+ if !git_check_ref_format(min_hash)
+ logger.warn "ref returned by `GIT_DIR=#{gitdir} #{git_min_hash_cmd}` was invalid for min_hash: #{min_hash}"
+ return []
+ end
+
+ # Now find all commits between them
+ IO.foreach("|git rev-list #{min_hash.shellescape}..#{max_hash.shellescape} --") do |line|
+ hash = line.strip
+ commits.push(hash) if !resolved_exclude or !resolved_exclude.include? hash
end
- end
- if !commits or commits.empty?
- nil
+ commits.push(min_hash) if !resolved_exclude or !resolved_exclude.include? min_hash
else
- commits
+ commits.push(max_hash) if !resolved_exclude or !resolved_exclude.include? max_hash
end
+
+ commits
end
- # Import all commits from configured git directory into the commits
- # database.
-
- def self.import_all
- repositories.each do |repo_name, repo|
- stat = { true => 0, false => 0 }
- ENV['GIT_DIR'] = repo[:git_dir]
- IO.foreach("|git rev-list --format=oneline --all") do |line|
- sha1, message = line.strip.split " ", 2
- imported = false
- Commit.find_or_create_by_repository_name_and_sha1_and_message(repo_name, sha1, message[0..254]) do
- imported = true
- end
- stat[!!imported] += 1
- if (stat[true] + stat[false]) % 100 == 0
- if $stdout.tty? or ARGV[0] == '-v'
- puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
- end
- end
- end
- if $stdout.tty? or ARGV[0] == '-v'
- puts "#{$0} #{$$}: repo #{repo_name} add #{stat[true]} skip #{stat[false]}"
- end
+ # Given a repository (url, or name of hosted repo) and commit sha1,
+ # copy the commit into the internal git repo and tag it with the
+ # given tag (typically a job UUID).
+ #
+ # The repo can be a remote url, but in this case sha1 must already
+ # be present in our local cache for that repo: e.g., sha1 was just
+ # returned by find_commit_range.
+ def self.tag_in_internal_repository repo_name, sha1, tag
+ unless git_check_ref_format tag
+ raise ArgumentError.new "invalid tag #{tag}"
+ end
+ unless /^[0-9a-f]{40}$/ =~ sha1
+ raise ArgumentError.new "invalid sha1 #{sha1}"
end
+ src_gitdir, _ = git_dir_for repo_name
+ unless src_gitdir
+ raise ArgumentError.new "no local repository for #{repo_name}"
+ end
+ dst_gitdir = Rails.configuration.git_internal_dir
+ must_pipe("echo #{sha1.shellescape}",
+ "git --git-dir #{src_gitdir.shellescape} pack-objects -q --revs --stdout",
+ "git --git-dir #{dst_gitdir.shellescape} unpack-objects -q")
+ must_git(dst_gitdir,
+ "tag --force #{tag.shellescape} #{sha1.shellescape}")
end
- def self.refresh_repositories
- @repositories = nil
+ protected
+
+ def self.remote_url? repo_name
+ /^(https?|git):\/\// =~ repo_name
end
- protected
+ # Return [local_git_dir, is_remote]. If is_remote, caller must use
+ # fetch_remote_repository to ensure content is up-to-date.
+ #
+ # Raises an exception if the latest content could not be fetched for
+ # any reason.
+ def self.git_dir_for repo_name
+ if remote_url? repo_name
+ return [cache_dir_for(repo_name), true]
+ end
+ repos = Repository.readable_by(current_user).where(name: repo_name)
+ if repos.count == 0
+ raise ArgumentError.new "Repository not found: '#{repo_name}'"
+ elsif repos.count > 1
+ logger.error "Multiple repositories with name=='#{repo_name}'!"
+ raise ArgumentError.new "Name conflict"
+ else
+ return [repos.first.server_path, false]
+ end
+ end
+
+ def self.cache_dir_for git_url
+ File.join(cache_dir_base, Digest::SHA1.hexdigest(git_url) + ".git").to_s
+ end
- def self.repositories
- return @repositories if @repositories
+ def self.cache_dir_base
+ Rails.root.join 'tmp', 'git'
+ end
- @repositories = {}
- @gitdirbase = Rails.configuration.git_repositories_dir
- Dir.foreach @gitdirbase do |repo|
- next if repo.match /^\./
- git_dir = File.join(@gitdirbase,
- repo.match(/\.git$/) ? repo : File.join(repo, '.git'))
- repo_name = repo.sub(/\.git$/, '')
- @repositories[repo_name] = {git_dir: git_dir}
+ def self.fetch_remote_repository gitdir, git_url
+ # Caller decides which protocols are worth using. This is just a
+ # safety check to ensure we never use urls like "--flag" or wander
+ # into git's hardlink features by using bare "/path/foo" instead
+ # of "file:///path/foo".
+ unless /^[a-z]+:\/\// =~ git_url
+ raise ArgumentError.new "invalid git url #{git_url}"
end
+ begin
+ must_git gitdir, "branch"
+ rescue GitError => e
+ raise unless /Not a git repository/ =~ e.to_s
+ # OK, this just means we need to create a blank cache repository
+ # before fetching.
+ FileUtils.mkdir_p gitdir
+ must_git gitdir, "init"
+ end
+ must_git(gitdir,
+ "fetch --no-progress --tags --prune --force --update-head-ok #{git_url.shellescape} 'refs/heads/*:refs/heads/*'")
+ end
- @repositories
+ def self.must_git gitdir, *cmds
+ # Clear token in case a git helper tries to use it as a password.
+ orig_token = ENV['ARVADOS_API_TOKEN']
+ ENV['ARVADOS_API_TOKEN'] = ''
+ begin
+ git = "git --git-dir #{gitdir.shellescape}"
+ cmds.each do |cmd|
+ must_pipe git+" "+cmd
+ end
+ ensure
+ ENV['ARVADOS_API_TOKEN'] = orig_token
+ end
+ end
+
+ def self.must_pipe *cmds
+ cmd = cmds.join(" 2>&1 |") + " 2>&1"
+ out = IO.read("| </dev/null #{cmd}")
+ if not $?.success?
+ raise GitError.new "#{cmd}: #{$?}: #{out}"
+ end
end
end