3 # Copyright (C) The Arvados Authors. All rights reserved.
5 # SPDX-License-Identifier: AGPL-3.0
7 # This script can be installed as a git update hook.
9 # It can also be installed as a gitolite 'hooklet' in the
10 # hooks/common/update.secondary.d/ directory.
12 # NOTE: this script runs under the same assumptions as the 'update' hook, so
13 # the starting directory must be maintained and arguments must be passed on.
20 if ENV.has_key?('GL_OPTION_SKIP_COPYRIGHT_HEADER_CHECK')
21 puts "Skipping copyright header check..."
25 puts "Enforcing copyright headers..."
26 puts "(#{$refname}) (#{$oldrev[0,6]}) (#{$newrev[0,6]})"
28 def load_licenseignore
29 $licenseignore = `git show #{$newrev}:.licenseignore 2>/dev/null`.gsub(/\./,'\\.').gsub(/\*/,'.*').gsub(/\?/,'.').split("\n")
32 def check_file(filename, header, broken)
34 $licenseignore.each do |li|
35 if filename =~ /#{li}/
39 return broken if ignore
41 if header !~ /SPDX-License-Identifier:/
45 puts "missing or invalid copyright header in file #{filename}"
51 # enforce copyright headers
52 def check_copyright_headers
53 if ($newrev[0,6] == '000000')
54 # A branch is being deleted. Do not check old commits for DCO signoff!
57 elsif ($oldrev[0,6] == '000000')
58 if $refname != 'refs/heads/main'
59 # A new branch was pushed. Check all new commits in this branch.
60 puts "git rev-list --objects main..#{$newrev} | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^blob //p'"
61 blob_objects = `git rev-list --objects main..#{$newrev} | git cat-file --follow-symlinks --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^blob //p'`.split("\n")
62 commit_objects = `git rev-list --objects main..#{$newrev} | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^commit //p'`.split("\n")
63 all_objects = blob_objects + commit_objects
64 commits = `git rev-list main..#{$newrev}`.split("\n")
66 # First push to an empty repository
67 puts "git rev-list --objects #{$newrev} | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^blob //p'"
68 blob_objects = `git rev-list --objects #{$newrev} | git cat-file --follow-symlinks --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^blob //p'`.split("\n")
69 commit_objects = `git rev-list --objects #{$newrev} | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^commit //p'`.split("\n")
70 all_objects = blob_objects + commit_objects
71 commits = `git rev-list #{$newrev}`.split("\n")
74 blob_objects = `git rev-list --objects #{$oldrev}..#{$newrev} --not --branches='*' | git cat-file --follow-symlinks --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^blob //p'`.split("\n")
75 commit_objects = `git rev-list --objects #{$oldrev}..#{$newrev} | git cat-file --batch-check='%(objecttype) %(objectname) %(objectsize) %(rest)'| sed -n 's/^commit //p'`.split("\n")
76 all_objects = blob_objects + commit_objects
77 commits = `git rev-list #{$oldrev}..#{$newrev}`.split("\n")
82 all_objects.each do |rev|
86 # git object of type 'commit'
87 # This could be a new file that was added in this commit
88 # If this wasn't a bare repo, we could run the following to get the list of new files in this commit:
89 # new_files = `git show #{tmp[0]} --name-only --diff-filter=A --pretty=""`.split("\n")
90 # Instead, we just look at all the files touched in the commit and check the diff to see
91 # see if it is a new file. This could prove brittle...
92 files = `git show #{tmp[0]} --name-only --pretty=""`.split("\n")
95 commit = `git show #{tmp[0]} -- #{f}`
96 # Only consider files, not symlinks (mode 120000)
97 if commit =~ /^new file mode (100644|10755)\nindex 000000/
102 commit.each_line do |line|
103 if ((headerCount == 0) and (line =~ /Copyright.*All rights reserved./))
104 header = previousLine
107 elsif ((headerCount > 0) and (headerCount < 3))
110 elsif (headerCount == 3)
119 broken = check_file(filename, header, broken)
123 # git object of type 'blob'
125 # test if this is a symlink.
126 # Get the tree for each revision we are considering, find the blob hash in there, check the mode at start of line.
127 # Stop looking at revisions once we have a match.
130 tree = `git cat-file -p #{r}^{tree}`
131 if tree =~ /#{tmp[0]}/
132 if tree =~ /^120000.blob.#{tmp[0]}/
139 header = `git show #{tmp[0]} | head -n20 | egrep -A3 -B1 'Copyright.*All rights reserved.'`
140 broken = check_file(filename, header, broken)
142 #puts "#{filename} is a symbolic link, skipping"
149 puts "[POLICY] all files must contain copyright headers, for more information see"
151 puts " https://dev.arvados.org/projects/arvados/wiki/Coding_Standards#Copyright-headers"
153 puts "Enforcing copyright headers: FAIL"
157 puts "Enforcing copyright headers: PASS"
161 check_copyright_headers