PR target/79932
[official-gcc.git] / contrib / update-copyright.py
blobf9852a8544f835fe3f980772d93972b77046b4db
1 #!/usr/bin/python
3 # Copyright (C) 2013-2017 Free Software Foundation, Inc.
5 # This script is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3, or (at your option)
8 # any later version.
10 # This script adjusts the copyright notices at the top of source files
11 # so that they have the form:
13 # Copyright XXXX-YYYY Free Software Foundation, Inc.
15 # It doesn't change code that is known to be maintained elsewhere or
16 # that carries a non-FSF copyright.
18 # The script also doesn't change testsuite files, except those in
19 # libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite,
20 # while most tests in other directories are just things that failed at some
21 # point in the past.
23 # Pass --this-year to the script if you want it to add the current year
24 # to all applicable notices. Pass --quilt if you are using quilt and
25 # want files to be added to the quilt before being changed.
27 # By default the script will update all directories for which the
28 # output has been vetted. You can instead pass the names of individual
29 # directories, including those that haven't been approved. So:
31 # update-copyright.py --this-year
33 # is the command that would be used at the beginning of a year to update
34 # all copyright notices (and possibly at other times to check whether
35 # new files have been added with old years). On the other hand:
37 # update-copyright.py --this-year libitm
39 # would run the script on just libitm/.
41 # Note that things like --version output strings must be updated before
42 # this script is run. There's already a separate procedure for that.
44 import os
45 import re
46 import sys
47 import time
48 import subprocess
50 class Errors:
51 def __init__ (self):
52 self.num_errors = 0
54 def report (self, filename, string):
55 if filename:
56 string = filename + ': ' + string
57 sys.stderr.write (string + '\n')
58 self.num_errors += 1
60 def ok (self):
61 return self.num_errors == 0
63 class GenericFilter:
64 def __init__ (self):
65 self.skip_files = set()
66 self.skip_dirs = set()
67 self.skip_extensions = set()
68 self.fossilised_files = set()
69 self.own_files = set()
71 self.skip_files |= set ([
72 # Skip licence files.
73 'COPYING',
74 'COPYING.LIB',
75 'COPYING3',
76 'COPYING3.LIB',
77 'LICENSE',
78 'fdl.texi',
79 'gpl_v3.texi',
80 'fdl-1.3.xml',
81 'gpl-3.0.xml',
83 # Skip auto- and libtool-related files
84 'aclocal.m4',
85 'compile',
86 'config.guess',
87 'config.sub',
88 'depcomp',
89 'install-sh',
90 'libtool.m4',
91 'ltmain.sh',
92 'ltoptions.m4',
93 'ltsugar.m4',
94 'ltversion.m4',
95 'lt~obsolete.m4',
96 'missing',
97 'mkdep',
98 'mkinstalldirs',
99 'move-if-change',
100 'shlibpath.m4',
101 'symlink-tree',
102 'ylwrap',
104 # Skip FSF mission statement, etc.
105 'gnu.texi',
106 'funding.texi',
107 'appendix_free.xml',
109 # Skip imported texinfo files.
110 'texinfo.tex',
114 def get_line_filter (self, dir, filename):
115 if filename.startswith ('ChangeLog'):
116 # Ignore references to copyright in changelog entries.
117 return re.compile ('\t')
119 return None
121 def skip_file (self, dir, filename):
122 if filename in self.skip_files:
123 return True
125 (base, extension) = os.path.splitext (os.path.join (dir, filename))
126 if extension in self.skip_extensions:
127 return True
129 if extension == '.in':
130 # Skip .in files produced by automake.
131 if os.path.exists (base + '.am'):
132 return True
134 # Skip files produced by autogen
135 if (os.path.exists (base + '.def')
136 and os.path.exists (base + '.tpl')):
137 return True
139 # Skip configure files produced by autoconf
140 if filename == 'configure':
141 if os.path.exists (base + '.ac'):
142 return True
143 if os.path.exists (base + '.in'):
144 return True
146 return False
148 def skip_dir (self, dir, subdir):
149 return subdir in self.skip_dirs
151 def is_fossilised_file (self, dir, filename):
152 if filename in self.fossilised_files:
153 return True
154 # Only touch current current ChangeLogs.
155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156 return True
157 return False
159 def by_package_author (self, dir, filename):
160 return filename in self.own_files
162 class Copyright:
163 def __init__ (self, errors):
164 self.errors = errors
166 # Characters in a range of years. Include '.' for typos.
167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
169 # Non-whitespace characters in a copyright holder's name.
170 name = '[\w.,-]'
172 # Matches one year.
173 self.year_re = re.compile ('[0-9]+')
175 # Matches part of a year or copyright holder.
176 self.continuation_re = re.compile (ranges + '|' + name)
178 # Matches a full copyright notice:
179 self.copyright_re = re.compile (
180 # 1: 'Copyright (C)', etc.
181 '([Cc]opyright'
182 '|[Cc]opyright\s+\([Cc]\)'
183 '|[Cc]opyright\s+%s'
184 '|[Cc]opyright\s+©'
185 '|[Cc]opyright\s+@copyright{}'
186 '|copyright = u\''
187 '|@set\s+copyright[\w-]+)'
189 # 2: the years. Include the whitespace in the year, so that
190 # we can remove any excess.
191 '(\s*(?:' + ranges + ',?'
192 '|@value\{[^{}]*\})\s*)'
194 # 3: 'by ', if used
195 '(by\s+)?'
197 # 4: the copyright holder. Don't allow multiple consecutive
198 # spaces, so that right-margin gloss doesn't get caught
199 # (e.g. gnat_ugn.texi).
200 '(' + name + '(?:\s?' + name + ')*)?')
202 # A regexp for notices that might have slipped by. Just matching
203 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
204 # HTML header markers, so check for 'copyright' and two digits.
205 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
206 re.IGNORECASE)
207 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
208 self.holders = { '@copying': '@copying' }
209 self.holder_prefixes = set()
211 # True to 'quilt add' files before changing them.
212 self.use_quilt = False
214 # If set, force all notices to include this year.
215 self.max_year = None
217 # Goes after the year(s). Could be ', '.
218 self.separator = ' '
220 def add_package_author (self, holder, canon_form = None):
221 if not canon_form:
222 canon_form = holder
223 self.holders[holder] = canon_form
224 index = holder.find (' ')
225 while index >= 0:
226 self.holder_prefixes.add (holder[:index])
227 index = holder.find (' ', index + 1)
229 def add_external_author (self, holder):
230 self.holders[holder] = None
232 class BadYear():
233 def __init__ (self, year):
234 self.year = year
236 def __str__ (self):
237 return 'unrecognised year: ' + self.year
239 def parse_year (self, string):
240 year = int (string)
241 if len (string) == 2:
242 if year > 70:
243 return year + 1900
244 elif len (string) == 4:
245 return year
246 raise self.BadYear (string)
248 def year_range (self, years):
249 year_list = [self.parse_year (year)
250 for year in self.year_re.findall (years)]
251 assert len (year_list) > 0
252 return (min (year_list), max (year_list))
254 def set_use_quilt (self, use_quilt):
255 self.use_quilt = use_quilt
257 def include_year (self, year):
258 assert not self.max_year
259 self.max_year = year
261 def canonicalise_years (self, dir, filename, filter, years):
262 # Leave texinfo variables alone.
263 if years.startswith ('@value'):
264 return years
266 (min_year, max_year) = self.year_range (years)
268 # Update the upper bound, if enabled.
269 if self.max_year and not filter.is_fossilised_file (dir, filename):
270 max_year = max (max_year, self.max_year)
272 # Use a range.
273 if min_year == max_year:
274 return '%d' % min_year
275 else:
276 return '%d-%d' % (min_year, max_year)
278 def strip_continuation (self, line):
279 line = line.lstrip()
280 match = self.comment_re.match (line)
281 if match:
282 line = line[match.end():].lstrip()
283 return line
285 def is_complete (self, match):
286 holder = match.group (4)
287 return (holder
288 and (holder not in self.holder_prefixes
289 or holder in self.holders))
291 def update_copyright (self, dir, filename, filter, file, line, match):
292 orig_line = line
293 next_line = None
294 pathname = os.path.join (dir, filename)
296 intro = match.group (1)
297 if intro.startswith ('@set'):
298 # Texinfo year variables should always be on one line
299 after_years = line[match.end (2):].strip()
300 if after_years != '':
301 self.errors.report (pathname,
302 'trailing characters in @set: '
303 + after_years)
304 return (False, orig_line, next_line)
305 else:
306 # If it looks like the copyright is incomplete, add the next line.
307 while not self.is_complete (match):
308 try:
309 next_line = file.next()
310 except StopIteration:
311 break
313 # If the next line doesn't look like a proper continuation,
314 # assume that what we've got is complete.
315 continuation = self.strip_continuation (next_line)
316 if not self.continuation_re.match (continuation):
317 break
319 # Merge the lines for matching purposes.
320 orig_line += next_line
321 line = line.rstrip() + ' ' + continuation
322 next_line = None
324 # Rematch with the longer line, at the original position.
325 match = self.copyright_re.match (line, match.start())
326 assert match
328 holder = match.group (4)
330 # Use the filter to test cases where markup is getting in the way.
331 if filter.by_package_author (dir, filename):
332 assert holder not in self.holders
334 elif not holder:
335 self.errors.report (pathname, 'missing copyright holder')
336 return (False, orig_line, next_line)
338 elif holder not in self.holders:
339 self.errors.report (pathname,
340 'unrecognised copyright holder: ' + holder)
341 return (False, orig_line, next_line)
343 else:
344 # See whether the copyright is associated with the package
345 # author.
346 canon_form = self.holders[holder]
347 if not canon_form:
348 return (False, orig_line, next_line)
350 # Make sure the author is given in a consistent way.
351 line = (line[:match.start (4)]
352 + canon_form
353 + line[match.end (4):])
355 # Remove any 'by'
356 line = line[:match.start (3)] + line[match.end (3):]
358 # Update the copyright years.
359 years = match.group (2).strip()
360 try:
361 canon_form = self.canonicalise_years (dir, filename, filter, years)
362 except self.BadYear as e:
363 self.errors.report (pathname, str (e))
364 return (False, orig_line, next_line)
366 line = (line[:match.start (2)]
367 + ('' if intro.startswith ('copyright = ') else ' ')
368 + canon_form + self.separator
369 + line[match.end (2):])
371 # Use the standard (C) form.
372 if intro.endswith ('right'):
373 intro += ' (C)'
374 elif intro.endswith ('(c)'):
375 intro = intro[:-3] + '(C)'
376 line = line[:match.start (1)] + intro + line[match.end (1):]
378 # Strip trailing whitespace
379 line = line.rstrip() + '\n'
381 return (line != orig_line, line, next_line)
383 def process_file (self, dir, filename, filter):
384 pathname = os.path.join (dir, filename)
385 if filename.endswith ('.tmp'):
386 # Looks like something we tried to create before.
387 try:
388 os.remove (pathname)
389 except OSError:
390 pass
391 return
393 lines = []
394 changed = False
395 line_filter = filter.get_line_filter (dir, filename)
396 mode = None
397 with open (pathname, 'r') as file:
398 prev = None
399 mode = os.fstat (file.fileno()).st_mode
400 for line in file:
401 while line:
402 next_line = None
403 # Leave filtered-out lines alone.
404 if not (line_filter and line_filter.match (line)):
405 match = self.copyright_re.search (line)
406 if match:
407 res = self.update_copyright (dir, filename, filter,
408 file, line, match)
409 (this_changed, line, next_line) = res
410 changed = changed or this_changed
412 # Check for copyright lines that might have slipped by.
413 elif self.other_copyright_re.search (line):
414 self.errors.report (pathname,
415 'unrecognised copyright: %s'
416 % line.strip())
417 lines.append (line)
418 line = next_line
420 # If something changed, write the new file out.
421 if changed and self.errors.ok():
422 tmp_pathname = pathname + '.tmp'
423 with open (tmp_pathname, 'w') as file:
424 for line in lines:
425 file.write (line)
426 os.fchmod (file.fileno(), mode)
427 if self.use_quilt:
428 subprocess.call (['quilt', 'add', pathname])
429 os.rename (tmp_pathname, pathname)
431 def process_tree (self, tree, filter):
432 for (dir, subdirs, filenames) in os.walk (tree):
433 # Don't recurse through directories that should be skipped.
434 for i in xrange (len (subdirs) - 1, -1, -1):
435 if filter.skip_dir (dir, subdirs[i]):
436 del subdirs[i]
438 # Handle the files in this directory.
439 for filename in filenames:
440 if filter.skip_file (dir, filename):
441 sys.stdout.write ('Skipping %s\n'
442 % os.path.join (dir, filename))
443 else:
444 self.process_file (dir, filename, filter)
446 class CmdLine:
447 def __init__ (self, copyright = Copyright):
448 self.errors = Errors()
449 self.copyright = copyright (self.errors)
450 self.dirs = []
451 self.default_dirs = []
452 self.chosen_dirs = []
453 self.option_handlers = dict()
454 self.option_help = []
456 self.add_option ('--help', 'Print this help', self.o_help)
457 self.add_option ('--quilt', '"quilt add" files before changing them',
458 self.o_quilt)
459 self.add_option ('--this-year', 'Add the current year to every notice',
460 self.o_this_year)
462 def add_option (self, name, help, handler):
463 self.option_help.append ((name, help))
464 self.option_handlers[name] = handler
466 def add_dir (self, dir, filter = GenericFilter()):
467 self.dirs.append ((dir, filter))
469 def o_help (self, option = None):
470 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
471 'Options:\n' % sys.argv[0])
472 format = '%-15s %s\n'
473 for (what, help) in self.option_help:
474 sys.stdout.write (format % (what, help))
475 sys.stdout.write ('\nDirectories:\n')
477 format = '%-25s'
478 i = 0
479 for (dir, filter) in self.dirs:
480 i += 1
481 if i % 3 == 0 or i == len (self.dirs):
482 sys.stdout.write (dir + '\n')
483 else:
484 sys.stdout.write (format % dir)
485 sys.exit (0)
487 def o_quilt (self, option):
488 self.copyright.set_use_quilt (True)
490 def o_this_year (self, option):
491 self.copyright.include_year (time.localtime().tm_year)
493 def main (self):
494 for arg in sys.argv[1:]:
495 if arg[:1] != '-':
496 self.chosen_dirs.append (arg)
497 elif arg in self.option_handlers:
498 self.option_handlers[arg] (arg)
499 else:
500 self.errors.report (None, 'unrecognised option: ' + arg)
501 if self.errors.ok():
502 if len (self.chosen_dirs) == 0:
503 self.chosen_dirs = self.default_dirs
504 if len (self.chosen_dirs) == 0:
505 self.o_help()
506 else:
507 for chosen_dir in self.chosen_dirs:
508 canon_dir = os.path.join (chosen_dir, '')
509 count = 0
510 for (dir, filter) in self.dirs:
511 if (dir + os.sep).startswith (canon_dir):
512 count += 1
513 self.copyright.process_tree (dir, filter)
514 if count == 0:
515 self.errors.report (None, 'unrecognised directory: '
516 + chosen_dir)
517 sys.exit (0 if self.errors.ok() else 1)
519 #----------------------------------------------------------------------------
521 class TopLevelFilter (GenericFilter):
522 def skip_dir (self, dir, subdir):
523 return True
525 class ConfigFilter (GenericFilter):
526 def __init__ (self):
527 GenericFilter.__init__ (self)
529 def skip_file (self, dir, filename):
530 if filename.endswith ('.m4'):
531 pathname = os.path.join (dir, filename)
532 with open (pathname) as file:
533 # Skip files imported from gettext.
534 if file.readline().find ('gettext-') >= 0:
535 return True
536 return GenericFilter.skip_file (self, dir, filename)
538 class GCCFilter (GenericFilter):
539 def __init__ (self):
540 GenericFilter.__init__ (self)
542 self.skip_files |= set ([
543 # Not part of GCC
544 'math-68881.h',
547 self.skip_dirs |= set ([
548 # Better not create a merge nightmare for the GNAT folks.
549 'ada',
551 # Handled separately.
552 'testsuite',
555 self.skip_extensions |= set ([
556 # Maintained by the translation project.
557 '.po',
559 # Automatically-generated.
560 '.pot',
563 self.fossilised_files |= set ([
564 # Old news won't be updated.
565 'ONEWS',
568 class TestsuiteFilter (GenericFilter):
569 def __init__ (self):
570 GenericFilter.__init__ (self)
572 self.skip_extensions |= set ([
573 # Don't change the tests, which could be woend by anyone.
574 '.c',
575 '.C',
576 '.cc',
577 '.h',
578 '.hs',
579 '.f',
580 '.f90',
581 '.go',
582 '.inc',
583 '.java',
586 def skip_file (self, dir, filename):
587 # g++.niklas/README contains historical copyright information
588 # and isn't updated.
589 if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
590 return True
591 # Similarly params/README.
592 if filename == 'README' and os.path.basename (dir) == 'params':
593 return True
594 return GenericFilter.skip_file (self, dir, filename)
596 class LibCppFilter (GenericFilter):
597 def __init__ (self):
598 GenericFilter.__init__ (self)
600 self.skip_extensions |= set ([
601 # Maintained by the translation project.
602 '.po',
604 # Automatically-generated.
605 '.pot',
608 class LibGCCFilter (GenericFilter):
609 def __init__ (self):
610 GenericFilter.__init__ (self)
612 self.skip_dirs |= set ([
613 # Imported from GLIBC.
614 'soft-fp',
617 class LibStdCxxFilter (GenericFilter):
618 def __init__ (self):
619 GenericFilter.__init__ (self)
621 self.skip_files |= set ([
622 # Contains no copyright of its own, but quotes the GPL.
623 'intro.xml',
626 self.skip_dirs |= set ([
627 # Contains automatically-generated sources.
628 'html',
630 # The testsuite data files shouldn't be changed.
631 'data',
633 # Contains imported images
634 'images',
637 self.own_files |= set ([
638 # Contains markup around the copyright owner.
639 'spine.xml',
642 def get_line_filter (self, dir, filename):
643 if filename == 'boost_concept_check.h':
644 return re.compile ('// \(C\) Copyright Jeremy Siek')
645 return GenericFilter.get_line_filter (self, dir, filename)
647 class GCCCopyright (Copyright):
648 def __init__ (self, errors):
649 Copyright.__init__ (self, errors)
651 canon_fsf = 'Free Software Foundation, Inc.'
652 self.add_package_author ('Free Software Foundation', canon_fsf)
653 self.add_package_author ('Free Software Foundation.', canon_fsf)
654 self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
655 self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
656 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
657 self.add_package_author ('The Free Software Foundation', canon_fsf)
658 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
659 self.add_package_author ('Software Foundation, Inc.', canon_fsf)
661 self.add_external_author ('ARM')
662 self.add_external_author ('AdaCore')
663 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
664 self.add_external_author ('Cavium Networks.')
665 self.add_external_author ('Faraday Technology Corp.')
666 self.add_external_author ('Florida State University')
667 self.add_external_author ('Greg Colvin and Beman Dawes.')
668 self.add_external_author ('Hewlett-Packard Company')
669 self.add_external_author ('Intel Corporation')
670 self.add_external_author ('Information Technology Industry Council.')
671 self.add_external_author ('James Theiler, Brian Gough')
672 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
673 self.add_external_author ('National Research Council of Canada.')
674 self.add_external_author ('NVIDIA Corporation')
675 self.add_external_author ('Peter Dimov and Multi Media Ltd.')
676 self.add_external_author ('Peter Dimov')
677 self.add_external_author ('Pipeline Associates, Inc.')
678 self.add_external_author ('Regents of the University of California.')
679 self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
680 self.add_external_author ('Silicon Graphics')
681 self.add_external_author ('Stephen L. Moshier')
682 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
683 self.add_external_author ('The Go Authors. All rights reserved.')
684 self.add_external_author ('The Go Authors. All rights reserved.')
685 self.add_external_author ('The Go Authors.')
686 self.add_external_author ('The Regents of the University of California.')
687 self.add_external_author ('Unicode, Inc.')
688 self.add_external_author ('University of Toronto.')
690 class GCCCmdLine (CmdLine):
691 def __init__ (self):
692 CmdLine.__init__ (self, GCCCopyright)
694 self.add_dir ('.', TopLevelFilter())
695 # boehm-gc is imported from upstream.
696 self.add_dir ('config', ConfigFilter())
697 # contrib isn't really part of GCC.
698 self.add_dir ('fixincludes')
699 self.add_dir ('gcc', GCCFilter())
700 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
701 self.add_dir ('gnattools')
702 self.add_dir ('gotools')
703 self.add_dir ('include')
704 # intl is imported from upstream.
705 self.add_dir ('libada')
706 self.add_dir ('libatomic')
707 self.add_dir ('libbacktrace')
708 self.add_dir ('libcc1')
709 # libcilkrts is imported from upstream.
710 self.add_dir ('libcpp', LibCppFilter())
711 self.add_dir ('libdecnumber')
712 # libffi is imported from upstream.
713 self.add_dir ('libgcc', LibGCCFilter())
714 self.add_dir ('libgfortran')
715 # libgo is imported from upstream.
716 self.add_dir ('libgomp')
717 self.add_dir ('libhsail-rt')
718 self.add_dir ('libiberty')
719 self.add_dir ('libitm')
720 self.add_dir ('libobjc')
721 # liboffloadmic is imported from upstream.
722 self.add_dir ('libquadmath')
723 # libsanitizer is imported from upstream.
724 self.add_dir ('libssp')
725 self.add_dir ('libstdc++-v3', LibStdCxxFilter())
726 self.add_dir ('libvtv')
727 self.add_dir ('lto-plugin')
728 # maintainer-scripts maintainer-scripts
729 # zlib is imported from upstream.
731 self.default_dirs = [
732 'gcc',
733 'include',
734 'libada',
735 'libatomic',
736 'libbacktrace',
737 'libcc1',
738 'libcpp',
739 'libdecnumber',
740 'libgcc',
741 'libgfortran',
742 'libgomp',
743 'libhsail-rt',
744 'libiberty',
745 'libitm',
746 'libobjc',
747 'libssp',
748 'libstdc++-v3',
749 'libvtv',
750 'lto-plugin',
753 GCCCmdLine().main()