unistr/u{8,16,32}-uctomb: Avoid possible trouble with huge strings.
[gnulib.git] / build-aux / update-copyright
blobd9b7f683a0887f281a43f1634548fd8a64ac975b
1 #!/bin/sh
2 #! -*-perl-*-
4 # Update an FSF copyright year list to include the current year.
6 # Copyright (C) 2009-2020 Free Software Foundation, Inc.
8 # This program is free software: you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 3, or (at your option)
11 # any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program. If not, see <https://www.gnu.org/licenses/>.
21 # Written by Jim Meyering and Joel E. Denny
23 # This script updates an FSF copyright year list to include the current year.
24 # Usage: update-copyright [FILE...]
26 # The arguments to this script should be names of files that contain
27 # copyright statements to be updated. The copyright holder's name
28 # defaults to "Free Software Foundation, Inc." but may be changed to
29 # any other name by using the "UPDATE_COPYRIGHT_HOLDER" environment
30 # variable.
32 # For example, you might wish to use the update-copyright target rule
33 # in maint.mk from gnulib's maintainer-makefile module.
35 # Iff a copyright statement is recognized in a file and the final
36 # year is not the current year, then the statement is updated for the
37 # new year and it is reformatted to:
39 # 1. Fit within 72 columns.
40 # 2. Convert 2-digit years to 4-digit years by prepending "19".
41 # 3. Expand copyright year intervals. (See "Environment variables"
42 # below.)
44 # A warning is printed for every file for which no copyright
45 # statement is recognized.
47 # Each file's copyright statement must be formatted correctly in
48 # order to be recognized. For example, each of these is fine:
50 # Copyright @copyright{} 1990-2005, 2007-2009 Free Software
51 # Foundation, Inc.
53 # # Copyright (C) 1990-2005, 2007-2009 Free Software
54 # # Foundation, Inc.
56 # /*
57 # * Copyright &copy; 90,2005,2007-2009
58 # * Free Software Foundation, Inc.
59 # */
61 # However, the following format is not recognized because the line
62 # prefix changes after the first line:
64 # ## Copyright (C) 1990-2005, 2007-2009 Free Software
65 # # Foundation, Inc.
67 # However, any correctly formatted copyright statement following
68 # a non-matching copyright statements would be recognized.
70 # The exact conditions that a file's copyright statement must meet
71 # to be recognized are:
73 # 1. It is the first copyright statement that meets all of the
74 # following conditions. Subsequent copyright statements are
75 # ignored.
76 # 2. Its format is "Copyright (C)", then a list of copyright years,
77 # and then the name of the copyright holder.
78 # 3. The "(C)" takes one of the following forms or is omitted
79 # entirely:
81 # A. (C)
82 # B. (c)
83 # C. @copyright{}
84 # D. &copy;
85 # E. ©
87 # 4. The "Copyright" appears at the beginning of a line, except that it
88 # may be prefixed by any sequence (e.g., a comment) of no more than
89 # 5 characters -- including white space.
90 # 5. Iff such a prefix is present, the same prefix appears at the
91 # beginning of each remaining line within the FSF copyright
92 # statement. There is one exception in order to support C-style
93 # comments: if the first line's prefix contains nothing but
94 # whitespace surrounding a "/*", then the prefix for all subsequent
95 # lines is the same as the first line's prefix except with each of
96 # "/" and possibly "*" replaced by a " ". The replacement of "*"
97 # by " " is consistent throughout all subsequent lines.
98 # 6. Blank lines, even if preceded by the prefix, do not appear
99 # within the FSF copyright statement.
100 # 7. Each copyright year is 2 or 4 digits, and years are separated by
101 # commas, "-", or "--". Whitespace may appear after commas.
103 # Environment variables:
105 # 1. If UPDATE_COPYRIGHT_FORCE=1, a recognized FSF copyright statement
106 # is reformatted even if it does not need updating for the new
107 # year. If unset or set to 0, only updated FSF copyright
108 # statements are reformatted.
109 # 2. If UPDATE_COPYRIGHT_USE_INTERVALS=1, every series of consecutive
110 # copyright years (such as 90, 1991, 1992-2007, 2008) in a
111 # reformatted FSF copyright statement is collapsed to a single
112 # interval (such as 1990-2008). If unset or set to 0, all existing
113 # copyright year intervals in a reformatted FSF copyright statement
114 # are expanded instead.
115 # If UPDATE_COPYRIGHT_USE_INTERVALS=2, convert a sequence with gaps
116 # to the minimal containing range. For example, convert
117 # 2000, 2004-2007, 2009 to 2000-2009.
118 # 3. For testing purposes, you can set the assumed current year in
119 # UPDATE_COPYRIGHT_YEAR.
120 # 4. The default maximum line length for a copyright line is 72.
121 # Set UPDATE_COPYRIGHT_MAX_LINE_LENGTH to use a different length.
122 # 5. Set UPDATE_COPYRIGHT_HOLDER if the copyright holder is other
123 # than "Free Software Foundation, Inc.".
125 # This is a prologue that allows to run a perl script as an executable
126 # on systems that are compliant to a POSIX version before POSIX:2017.
127 # On such systems, the usual invocation of an executable through execlp()
128 # or execvp() fails with ENOEXEC if it is a script that does not start
129 # with a #! line. The script interpreter mentioned in the #! line has
130 # to be /bin/sh, because on GuixSD systems that is the only program that
131 # has a fixed file name. The second line is essential for perl and is
132 # also useful for editing this file in Emacs. The next two lines below
133 # are valid code in both sh and perl. When executed by sh, they re-execute
134 # the script through the perl program found in $PATH. The '-x' option
135 # is essential as well; without it, perl would re-execute the script
136 # through /bin/sh. When executed by perl, the next two lines are a no-op.
137 eval 'exec perl -wSx -0777 -pi "$0" "$@"'
138 if 0;
140 my $VERSION = '2020-04-04.15:07'; # UTC
141 # The definition above must lie within the first 8 lines in order
142 # for the Emacs time-stamp write hook (at end) to update it.
143 # If you change this file with Emacs, please let the write hook
144 # do its job. Otherwise, update this string manually.
146 use strict;
147 use warnings;
149 my $copyright_re = 'Copyright';
150 my $circle_c_re = '(?:\([cC]\)|@copyright\{}|\\\\\(co|&copy;|©)';
151 my $holder = $ENV{UPDATE_COPYRIGHT_HOLDER};
152 $holder ||= 'Free Software Foundation, Inc.';
153 my $prefix_max = 5;
154 my $margin = $ENV{UPDATE_COPYRIGHT_MAX_LINE_LENGTH};
155 !$margin || $margin !~ m/^\d+$/
156 and $margin = 72;
158 my $tab_width = 8;
160 my $this_year = $ENV{UPDATE_COPYRIGHT_YEAR};
161 if (!$this_year || $this_year !~ m/^\d{4}$/)
163 my ($sec, $min, $hour, $mday, $month, $year) = localtime (time ());
164 $this_year = $year + 1900;
167 # Unless the file consistently uses "\r\n" as the EOL, use "\n" instead.
168 my $eol = /(?:^|[^\r])\n/ ? "\n" : "\r\n";
170 my $leading;
171 my $prefix;
172 my $ws_re;
173 my $stmt_re;
174 while (/(^|\n)(.{0,$prefix_max})$copyright_re/g)
176 $leading = "$1$2";
177 $prefix = $2;
178 if ($prefix =~ /^(\s*\/)\*(\s*)$/)
180 $prefix =~ s,/, ,;
181 my $prefix_ws = $prefix;
182 $prefix_ws =~ s/\*/ /; # Only whitespace.
183 if (/\G(?:[^*\n]|\*[^\/\n])*\*?\n$prefix_ws/)
185 $prefix = $prefix_ws;
188 $ws_re = '[ \t\r\f]'; # \s without \n
189 $ws_re =
190 "(?:$ws_re*(?:$ws_re|\\n" . quotemeta($prefix) . ")$ws_re*)";
191 my $holder_re = $holder;
192 $holder_re =~ s/\s/$ws_re/g;
193 my $stmt_remainder_re =
194 "(?:$ws_re$circle_c_re)?"
195 . "$ws_re(?:(?:\\d\\d)?\\d\\d(?:,$ws_re?|--?))*"
196 . "((?:\\d\\d)?\\d\\d)$ws_re$holder_re";
197 if (/\G$stmt_remainder_re/)
199 $stmt_re =
200 quotemeta($leading) . "($copyright_re$stmt_remainder_re)";
201 last;
204 if (defined $stmt_re)
206 /$stmt_re/ or die; # Should never die.
207 my $stmt = $1;
208 my $final_year_orig = $2;
210 # Handle two-digit year numbers like "98" and "99".
211 my $final_year = $final_year_orig;
212 $final_year <= 99
213 and $final_year += 1900;
215 if ($final_year != $this_year)
217 # Update the year.
218 $stmt =~ s/\b$final_year_orig\b/$final_year, $this_year/;
220 if ($final_year != $this_year || $ENV{'UPDATE_COPYRIGHT_FORCE'})
222 # Normalize all whitespace including newline-prefix sequences.
223 $stmt =~ s/$ws_re/ /g;
225 # Put spaces after commas.
226 $stmt =~ s/, ?/, /g;
228 # Convert 2-digit to 4-digit years.
229 $stmt =~ s/(\b\d\d\b)/19$1/g;
231 # Make the use of intervals consistent.
232 if (!$ENV{UPDATE_COPYRIGHT_USE_INTERVALS})
234 $stmt =~ s/(\d{4})--?(\d{4})/join(', ', $1..$2)/eg;
236 else
238 my $ndash = $ARGV =~ /\.tex(i(nfo)?)?$/ ? "--" : "-";
240 $stmt =~
242 (\d{4})
244 (,\ |--?)
245 ((??{
246 if ($2 ne ', ') { '\d{4}'; }
247 elsif (!$3) { $1 + 1; }
248 else { $3 + 1; }
251 /$1$ndash$3/gx;
253 # When it's 2, emit a single range encompassing all year numbers.
254 $ENV{UPDATE_COPYRIGHT_USE_INTERVALS} == 2
255 and $stmt =~ s/\b(\d{4})\b.*\b(\d{4})\b/$1$ndash$2/;
258 # Format within margin.
259 my $stmt_wrapped;
260 my $text_margin = $margin - length($prefix);
261 if ($prefix =~ /^(\t+)/)
263 $text_margin -= length($1) * ($tab_width - 1);
265 while (length $stmt)
267 if (($stmt =~ s/^(.{1,$text_margin})(?: |$)//)
268 || ($stmt =~ s/^([\S]+)(?: |$)//))
270 my $line = $1;
271 $stmt_wrapped .= $stmt_wrapped ? "$eol$prefix" : $leading;
272 $stmt_wrapped .= $line;
274 else
276 # Should be unreachable, but we don't want an infinite
277 # loop if it can be reached.
278 die;
282 # Replace the old copyright statement.
283 s/$stmt_re/$stmt_wrapped/;
286 else
288 print STDERR "$ARGV: warning: copyright statement not found\n";
291 # Hey Emacs!
292 # Local variables:
293 # coding: utf-8
294 # mode: perl
295 # indent-tabs-mode: nil
296 # eval: (add-hook 'before-save-hook 'time-stamp)
297 # time-stamp-line-limit: 200
298 # time-stamp-start: "my $VERSION = '"
299 # time-stamp-format: "%:y-%02m-%02d.%02H:%02M"
300 # time-stamp-time-zone: "UTC0"
301 # time-stamp-end: "'; # UTC"
302 # End: