admin/copyright.py

   1 #!/usr/bin/python
   2 #
   3 # This file is part of the GROMACS molecular simulation package.
   4 #
   5 # Copyright (c) 2013, by the GROMACS development team, led by
   6 # Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
   7 # and including many others, as listed in the AUTHORS file in the
   8 # top-level source directory and at http://www.gromacs.org.
   9 #
  10 # GROMACS is free software; you can redistribute it and/or
  11 # modify it under the terms of the GNU Lesser General Public License
  12 # as published by the Free Software Foundation; either version 2.1
  13 # of the License, or (at your option) any later version.
  14 #
  15 # GROMACS is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  18 # Lesser General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU Lesser General Public
  21 # License along with GROMACS; if not, see
  22 # http://www.gnu.org/licenses, or write to the Free Software Foundation,
  23 # Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  24 #
  25 # If you want to redistribute modifications to GROMACS, please
  26 # consider that scientific software is very special. Version
  27 # control is crucial - bugs must be traceable. We will be happy to
  28 # consider code for inclusion in the official distribution, but
  29 # derived work must not be called official GROMACS. Details are found
  30 # in the README & COPYING files - if they are missing, get the
  31 # official version at http://www.gromacs.org.
  32 #
  33 # To help us fund GROMACS development, we humbly ask that you cite
  34 # the research papers on the package. Check out http://www.gromacs.org.
  35
  36 import datetime
  37 import os.path
  38 import re
  39 import sys
  40
  41 from optparse import OptionParser
  42
  43 class CopyrightState(object):
  44
  45     """Information about an existing (or non-existing) copyright header."""
  46
  47     def __init__(self, has_copyright, is_correct, is_newstyle, years, other_copyrights):
  48         self.has_copyright = has_copyright
  49         self.is_correct = is_correct
  50         self.is_newstyle = is_newstyle
  51         self.years = years
  52         self.other_copyrights = other_copyrights
  53
  54 class CopyrightChecker(object):
  55
  56     """Logic for analyzing existing copyright headers and generating new ones."""
  57
  58     _header = ["", "This file is part of the GROMACS molecular simulation package.", ""]
  59     _copyright = "Copyright (c) {0}, by the GROMACS development team, led by"
  60     _footer = """
  61 Mark Abraham, David van der Spoel, Berk Hess, and Erik Lindahl,
  62 and including many others, as listed in the AUTHORS file in the
  63 top-level source directory and at http://www.gromacs.org.
  64
  65 GROMACS is free software; you can redistribute it and/or
  66 modify it under the terms of the GNU Lesser General Public License
  67 as published by the Free Software Foundation; either version 2.1
  68 of the License, or (at your option) any later version.
  69
  70 GROMACS is distributed in the hope that it will be useful,
  71 but WITHOUT ANY WARRANTY; without even the implied warranty of
  72 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  73 Lesser General Public License for more details.
  74
  75 You should have received a copy of the GNU Lesser General Public
  76 License along with GROMACS; if not, see
  77 http://www.gnu.org/licenses, or write to the Free Software Foundation,
  78 Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301  USA.
  79
  80 If you want to redistribute modifications to GROMACS, please
  81 consider that scientific software is very special. Version
  82 control is crucial - bugs must be traceable. We will be happy to
  83 consider code for inclusion in the official distribution, but
  84 derived work must not be called official GROMACS. Details are found
  85 in the README & COPYING files - if they are missing, get the
  86 official version at http://www.gromacs.org.
  87
  88 To help us fund GROMACS development, we humbly ask that you cite
  89 the research papers on the package. Check out http://www.gromacs.org.
  90 """.strip().splitlines()
  91
  92     def check_copyright(self, comment_block):
  93         """Analyze existing copyright header for correctness and extract information."""
  94         copyright_re = r'Copyright \(c\) (([0-9]{4}[,-])*[0-9]{4}),? by the GROMACS development team,'
  95         has_copyright = False
  96         is_newstyle = True
  97         is_correct = True
  98         next_header_line = 0
  99         next_footer_line = 0
 100         append_next_line_to_other_copyrights = False
 101         existing_years = ''
 102         other_copyrights = []
 103         for line in comment_block:
 104             if append_next_line_to_other_copyrights:
 105                 other_copyrights[-1] += ' ' + line
 106                 append_next_line_to_other_copyrights = False
 107                 continue
 108             if 'Copyright' in line:
 109                 has_copyright = True
 110                 match = re.match(copyright_re, line)
 111                 if match:
 112                     existing_years = match.group(1)
 113                     new_line = self._copyright.format(existing_years)
 114                     if line != new_line:
 115                         is_correct = False
 116                 else:
 117                     other_copyrights.append(line[line.find('Copyright'):])
 118                     if not line.startswith('Copyright'):
 119                         append_next_line_to_other_copyrights = True
 120                 if next_header_line != -1 or next_footer_line != 0:
 121                     is_correct = False
 122                 continue
 123             if line.startswith('Written by the Gromacs development team'):
 124                 has_copyright = True
 125             if next_header_line >= 0:
 126                 if line == self._header[next_header_line]:
 127                     next_header_line += 1
 128                     if next_header_line >= len(self._header):
 129                         next_header_line = -1
 130                 else:
 131                     is_correct = False
 132                     is_newstyle = False
 133             elif next_footer_line >= 0:
 134                 if line == self._footer[next_footer_line]:
 135                     next_footer_line += 1
 136                     if next_footer_line >= len(self._footer):
 137                         next_footer_line = -1
 138                 else:
 139                     is_correct = False
 140             else:
 141                 is_correct = False
 142         if next_header_line != -1 or next_footer_line != -1:
 143             is_correct = False
 144
 145         return CopyrightState(has_copyright, is_correct, is_newstyle, existing_years, other_copyrights)
 146
 147     def process_copyright(self, state, options, current_years, reporter):
 148         """Determine whether a copyrigth header needs to be updated and report issues."""
 149         need_update = False
 150
 151         if state.years:
 152             if options.replace_years:
 153                 if state.years != current_years:
 154                     need_update = True
 155                     reporter.report('copyright years replaced')
 156                 new_years = current_years
 157             else:
 158                 new_years = state.years
 159                 if not new_years.endswith(current_years):
 160                     if options.update_year:
 161                         need_update = True
 162                         new_years += ',' + current_years
 163                     if options.check or not need_update:
 164                         reporter.report('copyright year outdated')
 165                     else:
 166                         reporter.report('copyright year added')
 167         else:
 168             new_years = current_years
 169
 170         if not state.has_copyright:
 171             if options.add_missing:
 172                 need_update = True
 173             if options.check or not need_update:
 174                 reporter.report('copyright header missing')
 175             elif options.add_missing:
 176                 reporter.report('copyright header added')
 177         else:
 178             if not state.is_newstyle:
 179                 if options.replace_header:
 180                     need_update = True
 181                 if options.check or not need_update:
 182                     reporter.report('copyright header incorrect')
 183                 else:
 184                     reporter.report('copyright header replaced')
 185             elif not state.is_correct:
 186                 if options.update_header:
 187                     need_update = True
 188                 if options.check or not need_update:
 189                     reporter.report('copyright header outdated')
 190                 else:
 191                     reporter.report('copyright header updated')
 192
 193         return need_update, new_years
 194
 195     def get_copyright_text(self, years, other_copyrights):
 196         """Construct a new copyright header."""
 197         output = []
 198         output.extend(self._header)
 199         if other_copyrights:
 200             for line in other_copyrights:
 201                 outline = line.rstrip()
 202                 if outline.endswith(','):
 203                     outline = outline[:-1]
 204                 if not outline.endswith('.'):
 205                     outline += '.'
 206                 output.append(outline)
 207         output.append(self._copyright.format(years))
 208         output.extend(self._footer)
 209         return output
 210
 211 class Reporter(object):
 212
 213     """Wrapper for reporting issues in a file."""
 214
 215     def __init__(self, reportfile, filename):
 216         self._reportfile = reportfile
 217         self._filename = filename
 218
 219     def report(self, text):
 220         self._reportfile.write(self._filename + ': ' + text + '\n');
 221
 222 class CommentHandlerC(object):
 223
 224     """Handler for extracting and creating C-style comments."""
 225
 226     def extract_first_comment_block(self, content_lines):
 227         if not content_lines or not content_lines[0].startswith('/*'):
 228             return ([], 0)
 229         comment_block = [content_lines[0][2:].strip()]
 230         line_index = 1
 231         while line_index < len(content_lines):
 232             line = content_lines[line_index]
 233             if '*/' in content_lines[line_index]:
 234                 break
 235             comment_block.append(line.lstrip('* ').rstrip())
 236             line_index += 1
 237         return (comment_block, line_index + 1)
 238
 239     def create_comment_block(self, lines):
 240         output = []
 241         output.append(('/* ' + lines[0]).rstrip())
 242         output.extend([(' * ' + x).rstrip() for x in lines[1:]])
 243         output.append(' */')
 244         return output
 245
 246 class CommentHandlerSimple(object):
 247
 248     """Handler for extracting and creating sh-style comments.
 249
 250     Also other comments of the same type, but with a different comment
 251     character are supported."""
 252
 253     def __init__(self, comment_char):
 254         self._comment_char = comment_char
 255
 256     def extract_first_comment_block(self, content_lines):
 257         if not content_lines or not content_lines[0].startswith(self._comment_char):
 258             return ([], 0)
 259         comment_block = []
 260         line_index = 0
 261         while line_index < len(content_lines):
 262             line = content_lines[line_index]
 263             if not line.startswith(self._comment_char):
 264                 break
 265             comment_block.append(line.lstrip(self._comment_char + ' ').rstrip())
 266             line_index += 1
 267             if line == self._comment_char + ' the research papers on the package. Check out http://www.gromacs.org.':
 268                 break
 269         while line_index < len(content_lines):
 270             line = content_lines[line_index].rstrip()
 271             if len(line) > 0 and line != self._comment_char:
 272                 break
 273             line_index += 1
 274         return (comment_block, line_index)
 275
 276     def create_comment_block(self, lines):
 277         output = []
 278         output.extend([(self._comment_char + ' ' + x).rstrip() for x in lines])
 279         output.append('')
 280         return output
 281
 282 comment_handlers = {
 283         'c': CommentHandlerC(),
 284         'tex': CommentHandlerSimple('%'),
 285         'sh': CommentHandlerSimple('#')
 286         }
 287
 288 def select_comment_handler(override, filename):
 289     """Select comment handler for a file based on file name and input options."""
 290     filetype = override
 291     if not filetype and filename != '-':
 292         basename = os.path.basename(filename)
 293         root, ext = os.path.splitext(basename)
 294         if ext == '.cmakein':
 295             dummy, ext2 = os.path.splitext(root)
 296             if ext2:
 297                 ext = ext2
 298         if ext in ('.c', '.cu', '.cpp', '.h', '.cuh', '.y', '.l', '.pre', '.bm'):
 299             filetype = 'c'
 300         elif ext in ('.tex',):
 301             filetype = 'tex'
 302         elif basename in ('CMakeLists.txt', 'GMXRC', 'git-pre-commit') or \
 303                 ext in ('.cmake', '.cmakein', '.py', '.sh', '.bash', '.csh', '.zsh'):
 304             filetype = 'sh'
 305     if filetype in comment_handlers:
 306         return comment_handlers[filetype]
 307     if filetype:
 308         sys.stderr.write("Unsupported input format: {0}\n".format(filetype))
 309     elif filename != '-':
 310         sys.stderr.write("Unsupported input format: {0}\n".format(filename))
 311     else:
 312         sys.stderr.write("No file name or file type provided.\n")
 313     sys.exit(1)
 314
 315 def create_copyright_header(years, other_copyrights=None, language='c'):
 316     if language not in comment_handlers:
 317         sys.strerr.write("Unsupported language: {0}\n".format(language))
 318         sys.exit(1)
 319     copyright_checker = CopyrightChecker()
 320     comment_handler = comment_handlers[language]
 321     copyright_lines = copyright_checker.get_copyright_text(years, other_copyrights)
 322     comment_lines = comment_handler.create_comment_block(copyright_lines)
 323     return '\n'.join(comment_lines) + '\n'
 324
 325 def process_options():
 326     """Process input options."""
 327     parser = OptionParser()
 328     parser.add_option('-l', '--lang',
 329                       help='Comment type to use (c or sh)')
 330     parser.add_option('-y', '--years',
 331                       help='Comma-separated list of years')
 332     parser.add_option('-F', '--files',
 333                       help='File to read list of files from')
 334     parser.add_option('--check', action='store_true',
 335                       help='Do not modify the files, only check the copyright (default action). ' +
 336                            'If specified together with --update, do the modifications ' +
 337                            'but produce output as if only --check was provided.')
 338     parser.add_option('--update-year', action='store_true',
 339                       help='Update the copyright year if outdated')
 340     parser.add_option('--replace-years', action='store_true',
 341                       help='Replace the copyright years with those given with --years')
 342     parser.add_option('--update-header', action='store_true',
 343                       help='Update the copyright header if outdated')
 344     parser.add_option('--replace-header', action='store_true',
 345                       help='Replace any copyright header with the current one')
 346     parser.add_option('--remove-old-copyrights', action='store_true',
 347                       help='Remove copyright statements not in the new format')
 348     parser.add_option('--add-missing', action='store_true',
 349                       help='Add missing copyright headers')
 350     options, args = parser.parse_args()
 351
 352     filenames = args
 353     if options.files:
 354         with open(options.files, 'r') as filelist:
 355             filenames = [x.strip() for x in filelist.read().splitlines()]
 356     elif not filenames:
 357         filenames = ['-']
 358
 359     # Default is --check if nothing provided.
 360     if not options.check and not options.update_year and \
 361             not options.update_header and not options.replace_header and \
 362             not options.add_missing:
 363         options.check = True
 364
 365     return options, filenames
 366
 367 def main():
 368     """Do processing as a stand-alone script."""
 369     options, filenames = process_options()
 370     years = options.years
 371     if not years:
 372         years = str(datetime.date.today().year)
 373     if years.endswith(','):
 374         years = years[:-1]
 375
 376     checker = CopyrightChecker()
 377
 378     # Process each input file in turn.
 379     for filename in filenames:
 380         comment_handler = select_comment_handler(options.lang, filename)
 381
 382         # Read the input file.  We are doing an in-place operation, so can't
 383         # operate in pass-through mode.
 384         if filename == '-':
 385             contents = sys.stdin.read().splitlines()
 386             reporter = Reporter(sys.stderr, '<stdin>')
 387         else:
 388             with open(filename, 'r') as inputfile:
 389                 contents = inputfile.read().splitlines()
 390             reporter = Reporter(sys.stdout, filename)
 391
 392         output = []
 393         # Keep lines that must be at the beginning of the file and skip them in
 394         # the check.
 395         if contents and (contents[0].startswith('#!/') or \
 396                 contents[0].startswith('%code requires') or \
 397                 contents[0].startswith('/* #if')):
 398             output.append(contents[0])
 399             contents = contents[1:]
 400         # Remove and skip empty lines at the beginning.
 401         while contents and len(contents[0]) == 0:
 402             contents = contents[1:]
 403
 404         # Analyze the first comment block in the file.
 405         comment_block, line_count = comment_handler.extract_first_comment_block(contents)
 406         state = checker.check_copyright(comment_block)
 407         need_update, file_years = checker.process_copyright(state, options, years, reporter)
 408         if state.other_copyrights and options.remove_old_copyrights:
 409             need_update = True
 410             state.other_copyrights = []
 411             reporter.report('old copyrights removed')
 412
 413         if need_update:
 414             # Remove the original comment if it was a copyright comment.
 415             if state.has_copyright:
 416                 contents = contents[line_count:]
 417             new_block = checker.get_copyright_text(file_years, state.other_copyrights)
 418             output.extend(comment_handler.create_comment_block(new_block))
 419
 420         # Write the output file if required.
 421         if need_update or filename == '-':
 422             # Append the rest of the input file as it was.
 423             output.extend(contents)
 424             output = '\n'.join(output) + '\n'
 425             if filename == '-':
 426                 sys.stdout.write(output)
 427             else:
 428                 with open(filename, 'w') as outputfile:
 429                     outputfile.write(output)
 430
 431 if __name__ == "__main__":
 432     main()