Automatic date update in version.in
[binutils-gdb.git] / etc / update-copyright.py
blob8f5d33f336fbe3a6f27d1b61630c2f98cf473c79
1 #!/usr/bin/env python3
3 # Copyright (C) 2013-2023 Free Software Foundation, Inc.
5 # This script is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3, or (at your option)
8 # any later version.
10 # This script adjusts the copyright notices at the top of source files
11 # so that they have the form:
13 # Copyright XXXX-YYYY Free Software Foundation, Inc.
15 # It doesn't change code that is known to be maintained elsewhere or
16 # that carries a non-FSF copyright.
18 # Pass --this-year to the script if you want it to add the current year
19 # to all applicable notices. Pass --quilt if you are using quilt and
20 # want files to be added to the quilt before being changed.
22 # By default the script will update all directories for which the
23 # output has been vetted. You can instead pass the names of individual
24 # directories, including those that haven't been approved. So:
26 # update-copyright.py --this-year
28 # is the command that would be used at the beginning of a year to update
29 # all copyright notices (and possibly at other times to check whether
30 # new files have been added with old years). On the other hand:
32 # update-copyright.py --this-year libiberty
34 # would run the script on just libiberty/.
36 # This script was copied from gcc's contrib/ and modified to suit
37 # binutils. In contrast to the gcc script, this one will update
38 # the testsuite and --version output strings too.
40 import os
41 import re
42 import sys
43 import time
44 import subprocess
46 class Errors:
47 def __init__ (self):
48 self.num_errors = 0
50 def report (self, filename, string):
51 if filename:
52 string = filename + ': ' + string
53 sys.stderr.write (string + '\n')
54 self.num_errors += 1
56 def ok (self):
57 return self.num_errors == 0
59 class GenericFilter:
60 def __init__ (self):
61 self.skip_files = set()
62 self.skip_dirs = set()
63 self.skip_extensions = set([
64 '.png',
65 '.pyc',
67 self.fossilised_files = set()
68 self.own_files = set()
70 self.skip_files |= set ([
71 # Skip licence files.
72 'COPYING',
73 'COPYING.LIB',
74 'COPYING3',
75 'COPYING3.LIB',
76 'COPYING.LIBGLOSS',
77 'COPYING.NEWLIB',
78 'LICENSE',
79 'fdl.texi',
80 'gpl_v3.texi',
81 'fdl-1.3.xml',
82 'gpl-3.0.xml',
84 # Skip auto- and libtool-related files
85 'aclocal.m4',
86 'compile',
87 'config.guess',
88 'config.sub',
89 'depcomp',
90 'install-sh',
91 'libtool.m4',
92 'ltmain.sh',
93 'ltoptions.m4',
94 'ltsugar.m4',
95 'ltversion.m4',
96 'lt~obsolete.m4',
97 'missing',
98 'mkdep',
99 'mkinstalldirs',
100 'move-if-change',
101 'shlibpath.m4',
102 'symlink-tree',
103 'ylwrap',
105 # Skip FSF mission statement, etc.
106 'gnu.texi',
107 'funding.texi',
108 'appendix_free.xml',
110 # Skip imported texinfo files.
111 'texinfo.tex',
114 self.skip_extensions |= set ([
115 # Maintained by the translation project.
116 '.po',
118 # Automatically-generated.
119 '.pot',
122 self.skip_dirs |= set ([
123 'autom4te.cache',
127 def get_line_filter (self, dir, filename):
128 if filename.startswith ('ChangeLog'):
129 # Ignore references to copyright in changelog entries.
130 return re.compile ('\t')
132 return None
134 def skip_file (self, dir, filename):
135 if filename in self.skip_files:
136 return True
138 (base, extension) = os.path.splitext (os.path.join (dir, filename))
139 if extension in self.skip_extensions:
140 return True
142 if extension == '.in':
143 # Skip .in files produced by automake.
144 if os.path.exists (base + '.am'):
145 return True
147 # Skip files produced by autogen
148 if (os.path.exists (base + '.def')
149 and os.path.exists (base + '.tpl')):
150 return True
152 # Skip configure files produced by autoconf
153 if filename == 'configure':
154 if os.path.exists (base + '.ac'):
155 return True
156 if os.path.exists (base + '.in'):
157 return True
159 return False
161 def skip_dir (self, dir, subdir):
162 return subdir in self.skip_dirs
164 def is_fossilised_file (self, dir, filename):
165 if filename in self.fossilised_files:
166 return True
167 # Only touch current current ChangeLogs.
168 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
169 return True
170 return False
172 def by_package_author (self, dir, filename):
173 return filename in self.own_files
175 class Copyright:
176 def __init__ (self, errors):
177 self.errors = errors
179 # Characters in a range of years. Include '.' for typos.
180 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
182 # Non-whitespace characters in a copyright holder's name.
183 name = '[\w.,-]'
185 # Matches one year.
186 self.year_re = re.compile ('[0-9]+')
188 # Matches part of a year or copyright holder.
189 self.continuation_re = re.compile (ranges + '|' + name)
191 # Matches a full copyright notice:
192 self.copyright_re = re.compile (
193 # 1: 'Copyright (C)', etc.
194 '([Cc]opyright'
195 '|[Cc]opyright\s+\([Cc]\)'
196 '|[Cc]opyright\s+%s'
197 '|[Cc]opyright\s+©'
198 '|[Cc]opyright\s+@copyright{}'
199 '|@set\s+copyright[\w-]+)'
201 # 2: the years. Include the whitespace in the year, so that
202 # we can remove any excess.
203 '(\s*(?:' + ranges + ',?'
204 '|@value\{[^{}]*\})\s*)'
206 # 3: 'by ', if used
207 '(by\s+)?'
209 # 4: the copyright holder. Don't allow multiple consecutive
210 # spaces, so that right-margin gloss doesn't get caught
211 # (e.g. gnat_ugn.texi).
212 '(' + name + '(?:\s?' + name + ')*)?')
214 # A regexp for notices that might have slipped by. Just matching
215 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
216 # HTML header markers, so check for 'copyright' and two digits.
217 self.other_copyright_re = re.compile ('(^|[^\._])copyright[^=]*[0-9][0-9]',
218 re.IGNORECASE)
219 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
220 self.holders = { '@copying': '@copying' }
221 self.holder_prefixes = set()
223 # True to 'quilt add' files before changing them.
224 self.use_quilt = False
226 # If set, force all notices to include this year.
227 self.max_year = None
229 # Goes after the year(s). Could be ', '.
230 self.separator = ' '
232 def add_package_author (self, holder, canon_form = None):
233 if not canon_form:
234 canon_form = holder
235 self.holders[holder] = canon_form
236 index = holder.find (' ')
237 while index >= 0:
238 self.holder_prefixes.add (holder[:index])
239 index = holder.find (' ', index + 1)
241 def add_external_author (self, holder):
242 self.holders[holder] = None
244 class BadYear (Exception):
245 def __init__ (self, year):
246 self.year = year
248 def __str__ (self):
249 return 'unrecognised year: ' + self.year
251 def parse_year (self, string):
252 year = int (string)
253 if len (string) == 2:
254 if year > 70:
255 return year + 1900
256 elif len (string) == 4:
257 return year
258 raise self.BadYear (string)
260 def year_range (self, years):
261 year_list = [self.parse_year (year)
262 for year in self.year_re.findall (years)]
263 assert len (year_list) > 0
264 return (min (year_list), max (year_list))
266 def set_use_quilt (self, use_quilt):
267 self.use_quilt = use_quilt
269 def include_year (self, year):
270 assert not self.max_year
271 self.max_year = year
273 def canonicalise_years (self, dir, filename, filter, years):
274 # Leave texinfo variables alone.
275 if years.startswith ('@value'):
276 return years
278 (min_year, max_year) = self.year_range (years)
280 # Update the upper bound, if enabled.
281 if self.max_year and not filter.is_fossilised_file (dir, filename):
282 max_year = max (max_year, self.max_year)
284 # Use a range.
285 if min_year == max_year:
286 return '%d' % min_year
287 else:
288 return '%d-%d' % (min_year, max_year)
290 def strip_continuation (self, line):
291 line = line.lstrip()
292 match = self.comment_re.match (line)
293 if match:
294 line = line[match.end():].lstrip()
295 return line
297 def is_complete (self, match):
298 holder = match.group (4)
299 return (holder
300 and (holder not in self.holder_prefixes
301 or holder in self.holders))
303 def update_copyright (self, dir, filename, filter, file, line, match):
304 orig_line = line
305 next_line = None
306 pathname = os.path.join (dir, filename)
308 intro = match.group (1)
309 if intro.startswith ('@set'):
310 # Texinfo year variables should always be on one line
311 after_years = line[match.end (2):].strip()
312 if after_years != '':
313 self.errors.report (pathname,
314 'trailing characters in @set: '
315 + after_years)
316 return (False, orig_line, next_line)
317 else:
318 # If it looks like the copyright is incomplete, add the next line.
319 while not self.is_complete (match):
320 try:
321 next_line = file.readline()
322 except StopIteration:
323 break
325 # If the next line doesn't look like a proper continuation,
326 # assume that what we've got is complete.
327 continuation = self.strip_continuation (next_line)
328 if not self.continuation_re.match (continuation):
329 break
331 # Merge the lines for matching purposes.
332 orig_line += next_line
333 line = line.rstrip() + ' ' + continuation
334 next_line = None
336 # Rematch with the longer line, at the original position.
337 match = self.copyright_re.match (line, match.start())
338 assert match
340 holder = match.group (4)
342 # Use the filter to test cases where markup is getting in the way.
343 if filter.by_package_author (dir, filename):
344 assert holder not in self.holders
346 elif not holder:
347 self.errors.report (pathname, 'missing copyright holder')
348 return (False, orig_line, next_line)
350 elif holder not in self.holders:
351 self.errors.report (pathname,
352 'unrecognised copyright holder: ' + holder)
353 return (False, orig_line, next_line)
355 else:
356 # See whether the copyright is associated with the package
357 # author.
358 canon_form = self.holders[holder]
359 if not canon_form:
360 return (False, orig_line, next_line)
362 # Make sure the author is given in a consistent way.
363 line = (line[:match.start (4)]
364 + canon_form
365 + line[match.end (4):])
367 # Remove any 'by'
368 line = line[:match.start (3)] + line[match.end (3):]
370 # Update the copyright years.
371 years = match.group (2).strip()
372 if (self.max_year
373 and match.start(0) > 0 and line[match.start(0)-1] == '"'
374 and not filter.is_fossilised_file (dir, filename)):
375 # A printed copyright date consists of the current year
376 canon_form = '%d' % self.max_year
377 else:
378 try:
379 canon_form = self.canonicalise_years (dir, filename, filter, years)
380 except self.BadYear as e:
381 self.errors.report (pathname, str (e))
382 return (False, orig_line, next_line)
384 line = (line[:match.start (2)]
385 + ' ' + canon_form + self.separator
386 + line[match.end (2):])
388 # Use the standard (C) form.
389 if intro.endswith ('right'):
390 intro += ' (C)'
391 elif intro.endswith ('(c)'):
392 intro = intro[:-3] + '(C)'
393 line = line[:match.start (1)] + intro + line[match.end (1):]
395 # Strip trailing whitespace
396 line = line.rstrip() + '\n'
398 return (line != orig_line, line, next_line)
400 def guess_encoding (self, pathname):
401 for encoding in ('utf8', 'iso8859'):
402 try:
403 open(pathname, 'r', encoding=encoding).read()
404 return encoding
405 except UnicodeDecodeError:
406 pass
407 return None
409 def process_file (self, dir, filename, filter):
410 pathname = os.path.join (dir, filename)
411 if filename.endswith ('.tmp'):
412 # Looks like something we tried to create before.
413 try:
414 os.remove (pathname)
415 except OSError:
416 pass
417 return
419 lines = []
420 changed = False
421 line_filter = filter.get_line_filter (dir, filename)
422 mode = None
423 encoding = self.guess_encoding(pathname)
424 with open (pathname, 'r', encoding=encoding) as file:
425 prev = None
426 mode = os.fstat (file.fileno()).st_mode
427 for line in file:
428 while line:
429 next_line = None
430 # Leave filtered-out lines alone.
431 if not (line_filter and line_filter.match (line)):
432 match = self.copyright_re.search (line)
433 if match:
434 res = self.update_copyright (dir, filename, filter,
435 file, line, match)
436 (this_changed, line, next_line) = res
437 changed = changed or this_changed
439 # Check for copyright lines that might have slipped by.
440 elif self.other_copyright_re.search (line):
441 self.errors.report (pathname,
442 'unrecognised copyright: %s'
443 % line.strip())
444 lines.append (line)
445 line = next_line
447 # If something changed, write the new file out.
448 if changed and self.errors.ok():
449 tmp_pathname = pathname + '.tmp'
450 with open (tmp_pathname, 'w', encoding=encoding) as file:
451 for line in lines:
452 file.write (line)
453 os.fchmod (file.fileno(), mode)
454 if self.use_quilt:
455 subprocess.call (['quilt', 'add', pathname])
456 os.rename (tmp_pathname, pathname)
458 def process_tree (self, tree, filter):
459 for (dir, subdirs, filenames) in os.walk (tree):
460 # Don't recurse through directories that should be skipped.
461 for i in range (len (subdirs) - 1, -1, -1):
462 if filter.skip_dir (dir, subdirs[i]):
463 del subdirs[i]
465 # Handle the files in this directory.
466 for filename in filenames:
467 if filter.skip_file (dir, filename):
468 sys.stdout.write ('Skipping %s\n'
469 % os.path.join (dir, filename))
470 else:
471 self.process_file (dir, filename, filter)
473 class CmdLine:
474 def __init__ (self, copyright = Copyright):
475 self.errors = Errors()
476 self.copyright = copyright (self.errors)
477 self.dirs = []
478 self.default_dirs = []
479 self.chosen_dirs = []
480 self.option_handlers = dict()
481 self.option_help = []
483 self.add_option ('--help', 'Print this help', self.o_help)
484 self.add_option ('--quilt', '"quilt add" files before changing them',
485 self.o_quilt)
486 self.add_option ('--this-year', 'Add the current year to every notice',
487 self.o_this_year)
489 def add_option (self, name, help, handler):
490 self.option_help.append ((name, help))
491 self.option_handlers[name] = handler
493 def add_dir (self, dir, filter = GenericFilter()):
494 self.dirs.append ((dir, filter))
496 def o_help (self, option = None):
497 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
498 'Options:\n' % sys.argv[0])
499 format = '%-15s %s\n'
500 for (what, help) in self.option_help:
501 sys.stdout.write (format % (what, help))
502 sys.stdout.write ('\nDirectories:\n')
504 format = '%-25s'
505 i = 0
506 for (dir, filter) in self.dirs:
507 i += 1
508 if i % 3 == 0 or i == len (self.dirs):
509 sys.stdout.write (dir + '\n')
510 else:
511 sys.stdout.write (format % dir)
512 sys.exit (0)
514 def o_quilt (self, option):
515 self.copyright.set_use_quilt (True)
517 def o_this_year (self, option):
518 self.copyright.include_year (time.localtime().tm_year)
520 def main (self):
521 for arg in sys.argv[1:]:
522 if arg[:1] != '-':
523 self.chosen_dirs.append (arg)
524 elif arg in self.option_handlers:
525 self.option_handlers[arg] (arg)
526 else:
527 self.errors.report (None, 'unrecognised option: ' + arg)
528 if self.errors.ok():
529 if len (self.chosen_dirs) == 0:
530 self.chosen_dirs = self.default_dirs
531 if len (self.chosen_dirs) == 0:
532 self.o_help()
533 else:
534 for chosen_dir in self.chosen_dirs:
535 canon_dir = os.path.join (chosen_dir, '')
536 count = 0
537 for (dir, filter) in self.dirs:
538 if (dir + os.sep).startswith (canon_dir):
539 count += 1
540 self.copyright.process_tree (dir, filter)
541 if count == 0:
542 self.errors.report (None, 'unrecognised directory: '
543 + chosen_dir)
544 sys.exit (0 if self.errors.ok() else 1)
546 #----------------------------------------------------------------------------
548 class TopLevelFilter (GenericFilter):
549 def skip_dir (self, dir, subdir):
550 return True
552 class ConfigFilter (GenericFilter):
553 def __init__ (self):
554 GenericFilter.__init__ (self)
556 def skip_file (self, dir, filename):
557 if filename.endswith ('.m4'):
558 pathname = os.path.join (dir, filename)
559 with open (pathname) as file:
560 # Skip files imported from gettext.
561 if file.readline().find ('gettext-') >= 0:
562 return True
563 return GenericFilter.skip_file (self, dir, filename)
565 class LdFilter (GenericFilter):
566 def __init__ (self):
567 GenericFilter.__init__ (self)
569 self.skip_extensions |= set ([
570 # ld testsuite output match files.
571 '.ro',
574 class BinutilsCopyright (Copyright):
575 def __init__ (self, errors):
576 Copyright.__init__ (self, errors)
578 canon_fsf = 'Free Software Foundation, Inc.'
579 self.add_package_author ('Free Software Foundation', canon_fsf)
580 self.add_package_author ('Free Software Foundation.', canon_fsf)
581 self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
582 self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
583 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
584 self.add_package_author ('The Free Software Foundation', canon_fsf)
585 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
586 self.add_package_author ('Software Foundation, Inc.', canon_fsf)
588 self.add_external_author ('Carnegie Mellon University')
589 self.add_external_author ('John D. Polstra.')
590 self.add_external_author ('Innovative Computing Labs')
591 self.add_external_author ('Linaro Ltd.')
592 self.add_external_author ('MIPS Computer Systems, Inc.')
593 self.add_external_author ('Red Hat Inc.')
594 self.add_external_author ('Regents of the University of California.')
595 self.add_external_author ('The Regents of the University of California.')
596 self.add_external_author ('Third Eye Software, Inc.')
597 self.add_external_author ('Ulrich Drepper')
598 self.add_external_author ('Synopsys Inc.')
600 class BinutilsCmdLine (CmdLine):
601 def __init__ (self):
602 CmdLine.__init__ (self, BinutilsCopyright)
604 self.add_dir ('.', TopLevelFilter())
605 self.add_dir ('bfd')
606 self.add_dir ('binutils')
607 self.add_dir ('config', ConfigFilter())
608 self.add_dir ('cpu')
609 self.add_dir ('elfcpp')
610 self.add_dir ('etc')
611 self.add_dir ('gas')
612 self.add_dir ('gdb')
613 self.add_dir ('gdbserver')
614 self.add_dir ('gdbsupport')
615 self.add_dir ('gold')
616 self.add_dir ('gprof')
617 self.add_dir ('gprofng')
618 self.add_dir ('include')
619 self.add_dir ('ld', LdFilter())
620 self.add_dir ('libbacktrace')
621 self.add_dir ('libctf')
622 self.add_dir ('libdecnumber')
623 self.add_dir ('libiberty')
624 self.add_dir ('libsframe')
625 self.add_dir ('opcodes')
626 self.add_dir ('readline')
627 self.add_dir ('sim')
629 self.default_dirs = [
630 'bfd',
631 'binutils',
632 'elfcpp',
633 'etc',
634 'gas',
635 'gold',
636 'gprof',
637 'include',
638 'ld',
639 'libctf',
640 'libiberty',
641 'libsframe',
642 'opcodes',
645 BinutilsCmdLine().main()