PR other/59545
[official-gcc.git] / contrib / update-copyright.py
blob42d0bfbf2a3ec88c2439d085c7c34ac3f015087c
1 #!/usr/bin/python
3 # Copyright (C) 2013 Free Software Foundation, Inc.
5 # This script is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published by
7 # the Free Software Foundation; either version 3, or (at your option)
8 # any later version.
10 # This script adjusts the copyright notices at the top of source files
11 # so that they have the form:
13 # Copyright XXXX-YYYY Free Software Foundation, Inc.
15 # It doesn't change code that is known to be maintained elsewhere or
16 # that carries a non-FSF copyright.
18 # The script also doesn't change testsuite files, except those in
19 # libstdc++-v3. This is because libstdc++-v3 has a conformance testsuite,
20 # while most tests in other directories are just things that failed at some
21 # point in the past.
23 # Pass --this-year to the script if you want it to add the current year
24 # to all applicable notices. Pass --quilt if you are using quilt and
25 # want files to be added to the quilt before being changed.
27 # By default the script will update all directories for which the
28 # output has been vetted. You can instead pass the names of individual
29 # directories, including those that haven't been approved. So:
31 # update-copyright.pl --this-year
33 # is the command that would be used at the beginning of a year to update
34 # all copyright notices (and possibly at other times to check whether
35 # new files have been added with old years). On the other hand:
37 # update-copyright.pl --this-year libjava
39 # would run the script on just libjava/.
41 # Note that things like --version output strings must be updated before
42 # this script is run. There's already a separate procedure for that.
44 import os
45 import re
46 import sys
47 import time
48 import subprocess
50 class Errors:
51 def __init__ (self):
52 self.num_errors = 0
54 def report (self, filename, string):
55 if filename:
56 string = filename + ': ' + string
57 sys.stderr.write (string + '\n')
58 self.num_errors += 1
60 def ok (self):
61 return self.num_errors == 0
63 class GenericFilter:
64 def __init__ (self):
65 self.skip_files = set()
66 self.skip_dirs = set()
67 self.skip_extensions = set()
68 self.fossilised_files = set()
69 self.own_files = set()
71 self.skip_files |= set ([
72 # Skip licence files.
73 'COPYING',
74 'COPYING.LIB',
75 'COPYING3',
76 'COPYING3.LIB',
77 'LICENSE',
78 'fdl.texi',
79 'gpl_v3.texi',
80 'fdl-1.3.xml',
81 'gpl-3.0.xml',
83 # Skip auto- and libtool-related files
84 'aclocal.m4',
85 'compile',
86 'config.guess',
87 'config.sub',
88 'depcomp',
89 'install-sh',
90 'libtool.m4',
91 'ltmain.sh',
92 'ltoptions.m4',
93 'ltsugar.m4',
94 'ltversion.m4',
95 'lt~obsolete.m4',
96 'missing',
97 'mkdep',
98 'mkinstalldirs',
99 'move-if-change',
100 'shlibpath.m4',
101 'symlink-tree',
102 'ylwrap',
104 # Skip FSF mission statement, etc.
105 'gnu.texi',
106 'funding.texi',
107 'appendix_free.xml',
109 # Skip imported texinfo files.
110 'texinfo.tex',
114 def get_line_filter (self, dir, filename):
115 if filename.startswith ('ChangeLog'):
116 # Ignore references to copyright in changelog entries.
117 return re.compile ('\t')
119 return None
121 def skip_file (self, dir, filename):
122 if filename in self.skip_files:
123 return True
125 (base, extension) = os.path.splitext (os.path.join (dir, filename))
126 if extension in self.skip_extensions:
127 return True
129 if extension == '.in':
130 # Skip .in files produced by automake.
131 if os.path.exists (base + '.am'):
132 return True
134 # Skip files produced by autogen
135 if (os.path.exists (base + '.def')
136 and os.path.exists (base + '.tpl')):
137 return True
139 # Skip configure files produced by autoconf
140 if filename == 'configure':
141 if os.path.exists (base + '.ac'):
142 return True
143 if os.path.exists (base + '.in'):
144 return True
146 return False
148 def skip_dir (self, dir, subdir):
149 return subdir in self.skip_dirs
151 def is_fossilised_file (self, dir, filename):
152 if filename in self.fossilised_files:
153 return True
154 # Only touch current current ChangeLogs.
155 if filename != 'ChangeLog' and filename.find ('ChangeLog') >= 0:
156 return True
157 return False
159 def by_package_author (self, dir, filename):
160 return filename in self.own_files
162 class Copyright:
163 def __init__ (self, errors):
164 self.errors = errors
166 # Characters in a range of years. Include '.' for typos.
167 ranges = '[0-9](?:[-0-9.,\s]|\s+and\s+)*[0-9]'
169 # Non-whitespace characters in a copyright holder's name.
170 name = '[\w.,-]'
172 # Matches one year.
173 self.year_re = re.compile ('[0-9]+')
175 # Matches part of a year or copyright holder.
176 self.continuation_re = re.compile (ranges + '|' + name)
178 # Matches a full copyright notice:
179 self.copyright_re = re.compile (
180 # 1: 'Copyright (C)', etc.
181 '([Cc]opyright'
182 '|[Cc]opyright\s+\([Cc]\)'
183 '|[Cc]opyright\s+%s'
184 '|[Cc]opyright\s+©'
185 '|[Cc]opyright\s+@copyright{}'
186 '|@set\s+copyright[\w-]+)'
188 # 2: the years. Include the whitespace in the year, so that
189 # we can remove any excess.
190 '(\s*(?:' + ranges + ',?'
191 '|@value\{[^{}]*\})\s*)'
193 # 3: 'by ', if used
194 '(by\s+)?'
196 # 4: the copyright holder. Don't allow multiple consecutive
197 # spaces, so that right-margin gloss doesn't get caught
198 # (e.g. gnat_ugn.texi).
199 '(' + name + '(?:\s?' + name + ')*)?')
201 # A regexp for notices that might have slipped by. Just matching
202 # 'copyright' is too noisy, and 'copyright.*[0-9]' falls foul of
203 # HTML header markers, so check for 'copyright' and two digits.
204 self.other_copyright_re = re.compile ('copyright.*[0-9][0-9]',
205 re.IGNORECASE)
206 self.comment_re = re.compile('#+|[*]+|;+|%+|//+|@c |dnl ')
207 self.holders = { '@copying': '@copying' }
208 self.holder_prefixes = set()
210 # True to 'quilt add' files before changing them.
211 self.use_quilt = False
213 # If set, force all notices to include this year.
214 self.max_year = None
216 # Goes after the year(s). Could be ', '.
217 self.separator = ' '
219 def add_package_author (self, holder, canon_form = None):
220 if not canon_form:
221 canon_form = holder
222 self.holders[holder] = canon_form
223 index = holder.find (' ')
224 while index >= 0:
225 self.holder_prefixes.add (holder[:index])
226 index = holder.find (' ', index + 1)
228 def add_external_author (self, holder):
229 self.holders[holder] = None
231 class BadYear():
232 def __init__ (self, year):
233 self.year = year
235 def __str__ (self):
236 return 'unrecognised year: ' + self.year
238 def parse_year (self, string):
239 year = int (string)
240 if len (string) == 2:
241 if year > 70:
242 return year + 1900
243 elif len (string) == 4:
244 return year
245 raise self.BadYear (string)
247 def year_range (self, years):
248 year_list = [self.parse_year (year)
249 for year in self.year_re.findall (years)]
250 assert len (year_list) > 0
251 return (min (year_list), max (year_list))
253 def set_use_quilt (self, use_quilt):
254 self.use_quilt = use_quilt
256 def include_year (self, year):
257 assert not self.max_year
258 self.max_year = year
260 def canonicalise_years (self, dir, filename, filter, years):
261 # Leave texinfo variables alone.
262 if years.startswith ('@value'):
263 return years
265 (min_year, max_year) = self.year_range (years)
267 # Update the upper bound, if enabled.
268 if self.max_year and not filter.is_fossilised_file (dir, filename):
269 max_year = max (max_year, self.max_year)
271 # Use a range.
272 if min_year == max_year:
273 return '%d' % min_year
274 else:
275 return '%d-%d' % (min_year, max_year)
277 def strip_continuation (self, line):
278 line = line.lstrip()
279 match = self.comment_re.match (line)
280 if match:
281 line = line[match.end():].lstrip()
282 return line
284 def is_complete (self, match):
285 holder = match.group (4)
286 return (holder
287 and (holder not in self.holder_prefixes
288 or holder in self.holders))
290 def update_copyright (self, dir, filename, filter, file, line, match):
291 orig_line = line
292 next_line = None
293 pathname = os.path.join (dir, filename)
295 intro = match.group (1)
296 if intro.startswith ('@set'):
297 # Texinfo year variables should always be on one line
298 after_years = line[match.end (2):].strip()
299 if after_years != '':
300 self.errors.report (pathname,
301 'trailing characters in @set: '
302 + after_years)
303 return (False, orig_line, next_line)
304 else:
305 # If it looks like the copyright is incomplete, add the next line.
306 while not self.is_complete (match):
307 try:
308 next_line = file.next()
309 except StopIteration:
310 break
312 # If the next line doesn't look like a proper continuation,
313 # assume that what we've got is complete.
314 continuation = self.strip_continuation (next_line)
315 if not self.continuation_re.match (continuation):
316 break
318 # Merge the lines for matching purposes.
319 orig_line += next_line
320 line = line.rstrip() + ' ' + continuation
321 next_line = None
323 # Rematch with the longer line, at the original position.
324 match = self.copyright_re.match (line, match.start())
325 assert match
327 holder = match.group (4)
329 # Use the filter to test cases where markup is getting in the way.
330 if filter.by_package_author (dir, filename):
331 assert holder not in self.holders
333 elif not holder:
334 self.errors.report (pathname, 'missing copyright holder')
335 return (False, orig_line, next_line)
337 elif holder not in self.holders:
338 self.errors.report (pathname,
339 'unrecognised copyright holder: ' + holder)
340 return (False, orig_line, next_line)
342 else:
343 # See whether the copyright is associated with the package
344 # author.
345 canon_form = self.holders[holder]
346 if not canon_form:
347 return (False, orig_line, next_line)
349 # Make sure the author is given in a consistent way.
350 line = (line[:match.start (4)]
351 + canon_form
352 + line[match.end (4):])
354 # Remove any 'by'
355 line = line[:match.start (3)] + line[match.end (3):]
357 # Update the copyright years.
358 years = match.group (2).strip()
359 try:
360 canon_form = self.canonicalise_years (dir, filename, filter, years)
361 except self.BadYear as e:
362 self.errors.report (pathname, str (e))
363 return (False, orig_line, next_line)
365 line = (line[:match.start (2)]
366 + ' ' + canon_form + self.separator
367 + line[match.end (2):])
369 # Use the standard (C) form.
370 if intro.endswith ('right'):
371 intro += ' (C)'
372 elif intro.endswith ('(c)'):
373 intro = intro[:-3] + '(C)'
374 line = line[:match.start (1)] + intro + line[match.end (1):]
376 # Strip trailing whitespace
377 line = line.rstrip() + '\n'
379 return (line != orig_line, line, next_line)
381 def process_file (self, dir, filename, filter):
382 pathname = os.path.join (dir, filename)
383 if filename.endswith ('.tmp'):
384 # Looks like something we tried to create before.
385 try:
386 os.remove (pathname)
387 except OSError:
388 pass
389 return
391 lines = []
392 changed = False
393 line_filter = filter.get_line_filter (dir, filename)
394 with open (pathname, 'r') as file:
395 prev = None
396 for line in file:
397 while line:
398 next_line = None
399 # Leave filtered-out lines alone.
400 if not (line_filter and line_filter.match (line)):
401 match = self.copyright_re.search (line)
402 if match:
403 res = self.update_copyright (dir, filename, filter,
404 file, line, match)
405 (this_changed, line, next_line) = res
406 changed = changed or this_changed
408 # Check for copyright lines that might have slipped by.
409 elif self.other_copyright_re.search (line):
410 self.errors.report (pathname,
411 'unrecognised copyright: %s'
412 % line.strip())
413 lines.append (line)
414 line = next_line
416 # If something changed, write the new file out.
417 if changed and self.errors.ok():
418 tmp_pathname = pathname + '.tmp'
419 with open (tmp_pathname, 'w') as file:
420 for line in lines:
421 file.write (line)
422 if self.use_quilt:
423 subprocess.call (['quilt', 'add', pathname])
424 os.rename (tmp_pathname, pathname)
426 def process_tree (self, tree, filter):
427 for (dir, subdirs, filenames) in os.walk (tree):
428 # Don't recurse through directories that should be skipped.
429 for i in xrange (len (subdirs) - 1, -1, -1):
430 if filter.skip_dir (dir, subdirs[i]):
431 del subdirs[i]
433 # Handle the files in this directory.
434 for filename in filenames:
435 if filter.skip_file (dir, filename):
436 sys.stdout.write ('Skipping %s\n'
437 % os.path.join (dir, filename))
438 else:
439 self.process_file (dir, filename, filter)
441 class CmdLine:
442 def __init__ (self, copyright = Copyright):
443 self.errors = Errors()
444 self.copyright = copyright (self.errors)
445 self.dirs = []
446 self.default_dirs = []
447 self.chosen_dirs = []
448 self.option_handlers = dict()
449 self.option_help = []
451 self.add_option ('--help', 'Print this help', self.o_help)
452 self.add_option ('--quilt', '"quilt add" files before changing them',
453 self.o_quilt)
454 self.add_option ('--this-year', 'Add the current year to every notice',
455 self.o_this_year)
457 def add_option (self, name, help, handler):
458 self.option_help.append ((name, help))
459 self.option_handlers[name] = handler
461 def add_dir (self, dir, filter = GenericFilter()):
462 self.dirs.append ((dir, filter))
464 def o_help (self, option = None):
465 sys.stdout.write ('Usage: %s [options] dir1 dir2...\n\n'
466 'Options:\n' % sys.argv[0])
467 format = '%-15s %s\n'
468 for (what, help) in self.option_help:
469 sys.stdout.write (format % (what, help))
470 sys.stdout.write ('\nDirectories:\n')
472 format = '%-25s'
473 i = 0
474 for (dir, filter) in self.dirs:
475 i += 1
476 if i % 3 == 0 or i == len (self.dirs):
477 sys.stdout.write (dir + '\n')
478 else:
479 sys.stdout.write (format % dir)
480 sys.exit (0)
482 def o_quilt (self, option):
483 self.copyright.set_use_quilt (True)
485 def o_this_year (self, option):
486 self.copyright.include_year (time.localtime().tm_year)
488 def main (self):
489 for arg in sys.argv[1:]:
490 if arg[:1] != '-':
491 self.chosen_dirs.append (arg)
492 elif arg in self.option_handlers:
493 self.option_handlers[arg] (arg)
494 else:
495 self.errors.report (None, 'unrecognised option: ' + arg)
496 if self.errors.ok():
497 if len (self.chosen_dirs) == 0:
498 self.chosen_dirs = self.default_dirs
499 if len (self.chosen_dirs) == 0:
500 self.o_help()
501 else:
502 for chosen_dir in self.chosen_dirs:
503 canon_dir = os.path.join (chosen_dir, '')
504 count = 0
505 for (dir, filter) in self.dirs:
506 if (dir + os.sep).startswith (canon_dir):
507 count += 1
508 self.copyright.process_tree (dir, filter)
509 if count == 0:
510 self.errors.report (None, 'unrecognised directory: '
511 + chosen_dir)
512 sys.exit (0 if self.errors.ok() else 1)
514 #----------------------------------------------------------------------------
516 class TopLevelFilter (GenericFilter):
517 def skip_dir (self, dir, subdir):
518 return True
520 class ConfigFilter (GenericFilter):
521 def __init__ (self):
522 GenericFilter.__init__ (self)
524 def skip_file (self, dir, filename):
525 if filename.endswith ('.m4'):
526 pathname = os.path.join (dir, filename)
527 with open (pathname) as file:
528 # Skip files imported from gettext.
529 if file.readline().find ('gettext-') >= 0:
530 return True
531 return GenericFilter.skip_file (self, dir, filename)
533 class GCCFilter (GenericFilter):
534 def __init__ (self):
535 GenericFilter.__init__ (self)
537 self.skip_files |= set ([
538 # Not part of GCC
539 'math-68881.h',
542 self.skip_dirs |= set ([
543 # Better not create a merge nightmare for the GNAT folks.
544 'ada',
546 # Handled separately.
547 'testsuite',
550 self.skip_extensions |= set ([
551 # Maintained by the translation project.
552 '.po',
554 # Automatically-generated.
555 '.pot',
558 self.fossilised_files |= set ([
559 # Old news won't be updated.
560 'ONEWS',
563 class TestsuiteFilter (GenericFilter):
564 def __init__ (self):
565 GenericFilter.__init__ (self)
567 self.skip_extensions |= set ([
568 # Don't change the tests, which could be woend by anyone.
569 '.c',
570 '.C',
571 '.cc',
572 '.h',
573 '.hs',
574 '.f',
575 '.f90',
576 '.go',
577 '.inc',
578 '.java',
581 def skip_file (self, dir, filename):
582 # g++.niklas/README contains historical copyright information
583 # and isn't updated.
584 if filename == 'README' and os.path.basename (dir) == 'g++.niklas':
585 return True
586 return GenericFilter.skip_file (self, dir, filename)
588 class LibCppFilter (GenericFilter):
589 def __init__ (self):
590 GenericFilter.__init__ (self)
592 self.skip_extensions |= set ([
593 # Maintained by the translation project.
594 '.po',
596 # Automatically-generated.
597 '.pot',
600 class LibGCCFilter (GenericFilter):
601 def __init__ (self):
602 GenericFilter.__init__ (self)
604 self.skip_dirs |= set ([
605 # Imported from GLIBC.
606 'soft-fp',
609 class LibJavaFilter (GenericFilter):
610 def __init__ (self):
611 GenericFilter.__init__ (self)
613 self.skip_dirs |= set ([
614 # Handled separately.
615 'testsuite',
617 # Not really part of the library
618 'contrib',
620 # Imported from upstream
621 'classpath',
622 'libltdl',
625 def get_line_filter (self, dir, filename):
626 if filename == 'NameDecoder.h':
627 return re.compile ('.*NAME_COPYRIGHT')
628 if filename == 'ICC_Profile.h':
629 return re.compile ('.*icSigCopyrightTag')
630 return GenericFilter.get_line_filter (self, dir, filename)
632 class LibMudflapFilter (GenericFilter):
633 def __init__ (self):
634 GenericFilter.__init__ (self)
636 self.skip_dirs |= set ([
637 # Handled separately.
638 'testsuite',
641 class LibStdCxxFilter (GenericFilter):
642 def __init__ (self):
643 GenericFilter.__init__ (self)
645 self.skip_files |= set ([
646 # Contains no copyright of its own, but quotes the GPL.
647 'intro.xml',
650 self.skip_dirs |= set ([
651 # Contains automatically-generated sources.
652 'html',
654 # The testsuite data files shouldn't be changed.
655 'data',
657 # Contains imported images
658 'images',
661 self.own_files |= set ([
662 # Contains markup around the copyright owner.
663 'spine.xml',
666 def get_line_filter (self, dir, filename):
667 if filename == 'boost_concept_check.h':
668 return re.compile ('// \(C\) Copyright Jeremy Siek')
669 return GenericFilter.get_line_filter (self, dir, filename)
671 class GCCCopyright (Copyright):
672 def __init__ (self, errors):
673 Copyright.__init__ (self, errors)
675 canon_fsf = 'Free Software Foundation, Inc.'
676 self.add_package_author ('Free Software Foundation', canon_fsf)
677 self.add_package_author ('Free Software Foundation.', canon_fsf)
678 self.add_package_author ('Free Software Foundation Inc.', canon_fsf)
679 self.add_package_author ('Free Software Foundation, Inc', canon_fsf)
680 self.add_package_author ('Free Software Foundation, Inc.', canon_fsf)
681 self.add_package_author ('The Free Software Foundation', canon_fsf)
682 self.add_package_author ('The Free Software Foundation, Inc.', canon_fsf)
683 self.add_package_author ('Software Foundation, Inc.', canon_fsf)
685 self.add_external_author ('ARM')
686 self.add_external_author ('AdaCore')
687 self.add_external_author ('Ami Tavory and Vladimir Dreizin, IBM-HRL.')
688 self.add_external_author ('Cavium Networks.')
689 self.add_external_author ('Faraday Technology Corp.')
690 self.add_external_author ('Florida State University')
691 self.add_external_author ('Greg Colvin and Beman Dawes.')
692 self.add_external_author ('Hewlett-Packard Company')
693 self.add_external_author ('Information Technology Industry Council.')
694 self.add_external_author ('James Theiler, Brian Gough')
695 self.add_external_author ('Makoto Matsumoto and Takuji Nishimura,')
696 self.add_external_author ('National Research Council of Canada.')
697 self.add_external_author ('Peter Dimov and Multi Media Ltd.')
698 self.add_external_author ('Peter Dimov')
699 self.add_external_author ('Pipeline Associates, Inc.')
700 self.add_external_author ('Regents of the University of California.')
701 self.add_external_author ('Silicon Graphics Computer Systems, Inc.')
702 self.add_external_author ('Silicon Graphics')
703 self.add_external_author ('Stephen L. Moshier')
704 self.add_external_author ('Sun Microsystems, Inc. All rights reserved.')
705 self.add_external_author ('The Go Authors. All rights reserved.')
706 self.add_external_author ('The Go Authors. All rights reserved.')
707 self.add_external_author ('The Go Authors.')
708 self.add_external_author ('The Regents of the University of California.')
709 self.add_external_author ('Unicode, Inc.')
710 self.add_external_author ('University of Toronto.')
712 class GCCCmdLine (CmdLine):
713 def __init__ (self):
714 CmdLine.__init__ (self, GCCCopyright)
716 self.add_dir ('.', TopLevelFilter())
717 # boehm-gc is imported from upstream.
718 self.add_dir ('config', ConfigFilter())
719 # contrib isn't really part of GCC.
720 self.add_dir ('fixincludes')
721 self.add_dir ('gcc', GCCFilter())
722 self.add_dir (os.path.join ('gcc', 'testsuite'), TestsuiteFilter())
723 self.add_dir ('gnattools')
724 self.add_dir ('include')
725 self.add_dir ('libada')
726 self.add_dir ('libatomic')
727 self.add_dir ('libbacktrace')
728 self.add_dir ('libcpp', LibCppFilter())
729 self.add_dir ('libdecnumber')
730 # libffi is imported from upstream.
731 self.add_dir ('libgcc', LibGCCFilter())
732 self.add_dir ('libgfortran')
733 self.add_dir ('libgomp')
734 self.add_dir ('libiberty')
735 self.add_dir ('libitm')
736 self.add_dir ('libjava', LibJavaFilter())
737 self.add_dir (os.path.join ('libjava', 'testsuite'), TestsuiteFilter())
738 self.add_dir ('libmudflap', LibMudflapFilter())
739 self.add_dir (os.path.join ('libmudflap', 'testsuite'),
740 TestsuiteFilter())
741 self.add_dir ('libobjc')
742 self.add_dir ('libquadmath')
743 # libsanitiser is imported from upstream.
744 self.add_dir ('libssp')
745 self.add_dir ('libstdc++-v3', LibStdCxxFilter())
746 self.add_dir ('lto-plugin')
747 # zlib is imported from upstream.
749 self.default_dirs = [
750 'gcc',
751 'libada',
752 'libatomic',
753 'libbacktrace',
754 'libcpp',
755 'libdecnumber',
756 'libgcc',
757 'libgfortran',
758 'libgomp',
759 'libitm',
760 'libmudflap',
761 'libobjc',
762 'libstdc++-v3',
765 GCCCmdLine().main()