2 # Check the format of GNU Emacs change log entries.
4 # Copyright 2014-2019 Free Software Foundation, Inc.
6 # This file is part of GNU Emacs.
8 # GNU Emacs is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation, either version 3 of the License, or
11 # (at your option) any later version.
13 # GNU Emacs is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>.
21 # Written by Paul Eggert.
23 # Prefer gawk if available, as it handles NUL bytes properly.
24 if type gawk
>/dev
/null
2>&1; then
30 # Use a UTF-8 locale if available, so that the UTF-8 check works.
31 # Use U+00A2 CENT SIGN to test whether the locale works.
32 cent_sign_utf8_format
='\302\242\n'
33 cent_sign
=`printf "$cent_sign_utf8_format"`
34 print_at_sign
='BEGIN {print substr("'$cent_sign'@", 2)}'
35 at_sign
=`$awk "$print_at_sign" </dev/null 2>/dev/null`
36 if test "$at_sign" != @
; then
37 at_sign
=`LC_ALL=en_US.UTF-8 $awk "$print_at_sign" </dev/null 2>/dev/null`
38 if test "$at_sign" = @
; then
46 # Check the log entry.
47 exec $awk -v at_sign
="$at_sign" -v cent_sign
="$cent_sign" -v file="$1" '
49 # These regular expressions assume traditional Unix unibyte behavior.
50 # They are needed for old or broken versions of awk, e.g.,
51 # mawk 1.3.3 (1996), or gawk on MSYS (2015), and/or for systems that
52 # cannot use UTF-8 as the codeset for the locale.
53 space = "[ \f\n\r\t\v]"
54 non_space = "[^ \f\n\r\t\v]"
55 # The non_print below rejects control characters and surrogates
56 # UTF-8 for: 0x01-0x1f 0x7f 0x80-0x9f 0xd800-0xdbff 0xdc00-0xdfff
57 non_print = "[\1-\37\177]|\302[\200-\237]|\355[\240-\277][\200-\277]"
59 # Prefer POSIX regular expressions if available, as they do a
60 # better job of checking. Similarly, prefer POSIX negated
61 # expressions if UTF-8 also works.
62 if (" " ~ /[[:space:]]/) {
64 if (at_sign == "@" && cent_sign ~ /^[[:print:]]$/) {
65 non_space = "[^[:space:]]"
66 non_print = "[^[:print:]]"
69 c_lower = "abcdefghijklmnopqrstuvwxyz"
70 unsafe_gnu_url = "(http|ftp)://([" c_lower ".]*\\.)?(gnu|fsf)\\.org"
76 # Ignore every line after a scissors line.
77 if (/^# *---* *(>[8%]|[8%]<) *---* *$/) { exit }
79 # Ignore comment lines.
84 print "Invalid character (not UTF-8) in commit message"
88 nlines == 0 && $0 !~ non_space { next }
93 # Ignore special markers used by "git rebase --autosquash".
94 if (! sub(/^fixup! /, ""))
98 print "White space at start of commit message'\''s first line"
103 nlines == 2 && $0 ~ non_space {
104 print "Nonempty second line in commit message"
109 # Expand tabs to spaces for length calculations etc.
110 while (match($0, /\t/)) {
111 before_tab = substr($0, 1, RSTART - 1)
112 after_tab = substr($0, RSTART + 1)
113 $0 = sprintf("%s%*s%s", before_tab, 8 - (RSTART - 1) % 8, "", after_tab)
117 78 < length && $0 ~ space {
118 print "Line longer than 78 characters in commit message"
123 print "Word longer than 140 characters in commit message"
128 print "'\''Signed-off-by:'\'' in commit message"
132 $0 ~ unsafe_gnu_url {
137 print "Unprintable character in commit message"
143 print "Empty commit message"
146 if (status == 0 && needs_rewriting) {
147 for (i = 1; i <= NR; i++) {
149 while (match(line, unsafe_gnu_url)) {
150 prefix = substr(line, 1, RSTART - 1)
151 suffix = substr(line, RSTART)
152 line = prefix "https:" substr(suffix, 5 + (suffix ~ /^http:/))
156 if (close(file) != 0) {
157 print "Cannot rewrite: " file
162 print "Commit aborted; please see the file 'CONTRIBUTE
'"