contrib/gcc-changelog/git_commit.py

   1 #!/usr/bin/env python3
   2
   3 # Copyright (C) 2020-2023 Free Software Foundation, Inc.
   4 #
   5 # This file is part of GCC.
   6 #
   7 # GCC is free software; you can redistribute it and/or modify it under
   8 # the terms of the GNU General Public License as published by the Free
   9 # Software Foundation; either version 3, or (at your option) any later
  10 # version.
  11 #
  12 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  13 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
  14 # FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  15 # for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with GCC; see the file COPYING3.  If not see
  19 # <http://www.gnu.org/licenses/>.  */
  20
  21 import difflib
  22 import os
  23 import re
  24 import sys
  25 from collections import defaultdict
  26
  27 default_changelog_locations = {
  28     'c++tools',
  29     'config',
  30     'contrib',
  31     'contrib/header-tools',
  32     'contrib/reghunt',
  33     'contrib/regression',
  34     'fixincludes',
  35     'gcc/ada',
  36     'gcc/analyzer',
  37     'gcc/brig',
  38     'gcc/c',
  39     'gcc/c-family',
  40     'gcc',
  41     'gcc/cp',
  42     'gcc/d',
  43     'gcc/fortran',
  44     'gcc/go',
  45     'gcc/jit',
  46     'gcc/lto',
  47     'gcc/m2',
  48     'gcc/objc',
  49     'gcc/objcp',
  50     'gcc/po',
  51     'gcc/rust',
  52     'gcc/testsuite',
  53     'gnattools',
  54     'gotools',
  55     'include',
  56     'intl',
  57     'libada',
  58     'libatomic',
  59     'libbacktrace',
  60     'libcc1',
  61     'libcody',
  62     'libcpp',
  63     'libcpp/po',
  64     'libdecnumber',
  65     'libffi',
  66     'libgcc',
  67     'libgcc/config/avr/libf7',
  68     'libgcc/config/libbid',
  69     'libgfortran',
  70     'libgm2',
  71     'libgomp',
  72     'libhsail-rt',
  73     'libiberty',
  74     'libitm',
  75     'libobjc',
  76     'libphobos',
  77     'libquadmath',
  78     'libsanitizer',
  79     'libssp',
  80     'libstdc++-v3',
  81     'libvtv',
  82     'lto-plugin',
  83     'maintainer-scripts',
  84     'zlib'}
  85
  86 bug_components = {
  87     'ada',
  88     'analyzer',
  89     'boehm-gc',
  90     'bootstrap',
  91     'c',
  92     'c++',
  93     'd',
  94     'debug',
  95     'demangler',
  96     'driver',
  97     'fastjar',
  98     'fortran',
  99     'gcov-profile',
 100     'go',
 101     'hsa',
 102     'inline-asm',
 103     'ipa',
 104     'java',
 105     'jit',
 106     'libbacktrace',
 107     'libf2c',
 108     'libffi',
 109     'libfortran',
 110     'libgcc',
 111     'libgcj',
 112     'libgomp',
 113     'libitm',
 114     'libobjc',
 115     'libquadmath',
 116     'libstdc++',
 117     'lto',
 118     'middle-end',
 119     'modula2',
 120     'objc',
 121     'objc++',
 122     'other',
 123     'pch',
 124     'pending',
 125     'plugins',
 126     'preprocessor',
 127     'regression',
 128     'rtl-optimization',
 129     'rust',
 130     'sanitizer',
 131     'spam',
 132     'target',
 133     'testsuite',
 134     'translation',
 135     'tree-optimization',
 136     'web'}
 137
 138 ignored_prefixes = {
 139     'gcc/d/dmd/',
 140     'gcc/go/gofrontend/',
 141     'gcc/testsuite/gdc.test/',
 142     'gcc/testsuite/go.test/test/',
 143     'libffi/',
 144     'libgo/',
 145     'libphobos/libdruntime/',
 146     'libphobos/src/',
 147     'libsanitizer/',
 148     }
 149
 150 wildcard_prefixes = {
 151     'gcc/testsuite/',
 152     'libstdc++-v3/doc/html/',
 153     'libstdc++-v3/testsuite/'
 154     }
 155
 156 misc_files = {
 157     'gcc/DATESTAMP',
 158     'gcc/BASE-VER',
 159     'gcc/DEV-PHASE'
 160     }
 161
 162 author_line_regex = \
 163         re.compile(r'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.*  <.*>)')
 164 additional_author_regex = re.compile(r'^\t(?P<spaces>\ *)?(?P<name>.*  <.*>)')
 165 changelog_regex = re.compile(r'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
 166 subject_pr_regex = re.compile(r'(^|\W)PR\s+(?P<component>[a-zA-Z0-9+-]+)/(?P<pr>\d{4,7})')
 167 subject_pr2_regex = re.compile(r'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
 168 pr_regex = re.compile(r'\tPR (?P<component>[a-z0-9+-]+\/)?(?P<pr>[0-9]+)$')
 169 dr_regex = re.compile(r'\tDR ([0-9]+)$')
 170 star_prefix_regex = re.compile(r'\t\*(?P<spaces>\ *)(?P<content>.*)')
 171 end_of_location_regex = re.compile(r'[\[<(:]')
 172 item_empty_regex = re.compile(r'\t(\* \S+ )?\(\S+\):\s*$')
 173 item_parenthesis_regex = re.compile(r'\t(\*|\(\S+\):)')
 174 revert_regex = re.compile(r'This reverts commit (?P<hash>[0-9a-f]+)\.$')
 175 cherry_pick_regex = re.compile(r'cherry picked from commit (?P<hash>\w+)')
 176
 177 LINE_LIMIT = 100
 178 TAB_WIDTH = 8
 179 CO_AUTHORED_BY_PREFIX = 'co-authored-by: '
 180
 181 REVIEW_PREFIXES = ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
 182                    'acked-by: ', 'tested-by: ', 'reported-by: ',
 183                    'suggested-by: ')
 184 DATE_FORMAT = '%Y-%m-%d'
 185
 186
 187 def decode_path(path):
 188     # When core.quotepath is true (default value), utf8 chars are encoded like:
 189     # "b/ko\304\215ka.txt"
 190     #
 191     # The upstream bug is fixed:
 192     # https://github.com/gitpython-developers/GitPython/issues/1099
 193     #
 194     # but we still need a workaround for older versions of the library.
 195     # Please take a look at the explanation of the transformation:
 196     # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
 197
 198     if path.startswith('"') and path.endswith('"'):
 199         return (path.strip('"').encode('utf8').decode('unicode-escape')
 200                 .encode('latin-1').decode('utf8'))
 201     else:
 202         return path
 203
 204
 205 class Error:
 206     def __init__(self, message, line=None, details=None):
 207         self.message = message
 208         self.line = line
 209         self.details = details
 210
 211     def __repr__(self):
 212         s = self.message
 213         if self.line:
 214             s += ': "%s"' % self.line
 215         return s
 216
 217
 218 class ChangeLogEntry:
 219     def __init__(self, folder, authors, prs):
 220         self.folder = folder
 221         # The 'list.copy()' function is not available before Python 3.3
 222         self.author_lines = list(authors)
 223         self.initial_prs = list(prs)
 224         self.prs = list(prs)
 225         self.lines = []
 226         self.files = []
 227         self.file_patterns = []
 228         self.parentheses_stack = []
 229
 230     def parse_file_names(self):
 231         # Whether the content currently processed is between a star prefix the
 232         # end of the file list: a colon or an open paren.
 233         in_location = False
 234
 235         for line in self.lines:
 236             # If this line matches the star prefix, start the location
 237             # processing on the information that follows the star.
 238             # Note that we need to skip macro names that can be in form of:
 239             #
 240             # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
 241             # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
 242             # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
 243             #
 244             m = star_prefix_regex.match(line)
 245             if m and len(m.group('spaces')) == 1:
 246                 in_location = True
 247                 line = m.group('content')
 248
 249             if in_location:
 250                 # Strip everything that is not a filename in "line":
 251                 # entities "(NAME)", cases "<PATTERN>", conditions
 252                 # "[COND]", entry text (the colon, if present, and
 253                 # anything that follows it).
 254                 m = end_of_location_regex.search(line)
 255                 if m:
 256                     line = line[:m.start()]
 257                     in_location = False
 258
 259                 # At this point, all that's left is a list of filenames
 260                 # separated by commas and whitespaces.
 261                 for file in line.split(','):
 262                     file = file.strip()
 263                     if file:
 264                         if file.endswith('*'):
 265                             self.file_patterns.append(file[:-1])
 266                         else:
 267                             self.files.append(file)
 268
 269     @property
 270     def datetime(self):
 271         for author in self.author_lines:
 272             if author[1]:
 273                 return author[1]
 274         return None
 275
 276     @property
 277     def authors(self):
 278         return [author_line[0] for author_line in self.author_lines]
 279
 280     @property
 281     def is_empty(self):
 282         return not self.lines and self.prs == self.initial_prs
 283
 284     def contains_author(self, author):
 285         for author_lines in self.author_lines:
 286             if author_lines[0] == author:
 287                 return True
 288         return False
 289
 290
 291 class GitInfo:
 292     def __init__(self, hexsha, date, author, lines, modified_files):
 293         self.hexsha = hexsha
 294         self.date = date
 295         self.author = author
 296         self.lines = lines
 297         self.modified_files = modified_files
 298
 299
 300 class GitCommit:
 301     def __init__(self, info, commit_to_info_hook=None, ref_name=None):
 302         self.original_info = info
 303         self.info = info
 304         self.message = None
 305         self.changes = None
 306         self.changelog_entries = []
 307         self.errors = []
 308         self.warnings = []
 309         self.top_level_authors = []
 310         self.co_authors = []
 311         self.top_level_prs = []
 312         self.subject_prs = set()
 313         self.cherry_pick_commit = None
 314         self.revert_commit = None
 315         self.commit_to_info_hook = commit_to_info_hook
 316         self.init_changelog_locations(ref_name)
 317
 318         # Skip Update copyright years commits
 319         if self.info.lines and self.info.lines[0] == 'Update copyright years.':
 320             return
 321
 322         if self.info.lines and len(self.info.lines) > 1 and self.info.lines[1]:
 323             self.errors.append(Error('Expected empty second line in commit message', info.lines[0]))
 324
 325         # Identify first if the commit is a Revert commit
 326         for line in self.info.lines:
 327             m = revert_regex.fullmatch(line)
 328             if m:
 329                 self.revert_commit = m.group('hash')
 330                 break
 331         if self.revert_commit:
 332             # The following happens for get_email.py:
 333             if not self.commit_to_info_hook:
 334                 self.warnings.append(f"Invoked script can not obtain info about "
 335                                      f"reverted commits such as '{self.revert_commit}'")
 336                 return
 337             self.info = self.commit_to_info_hook(self.revert_commit)
 338             if not self.info:
 339                 self.errors.append(Error('Cannot find to-be-reverted commit', self.revert_commit))
 340                 return
 341
 342         self.check_commit_email()
 343
 344         # Extract PR numbers form the subject line
 345         # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
 346         if self.info.lines and not self.revert_commit:
 347             self.subject_prs = {m.group('pr') for m in subject_pr2_regex.finditer(info.lines[0])}
 348             for m in subject_pr_regex.finditer(info.lines[0]):
 349                 if not m.group('component') in bug_components:
 350                     self.errors.append(Error('invalid PR component in subject', info.lines[0]))
 351                 self.subject_prs.add(m.group('pr'))
 352
 353         # Allow complete deletion of ChangeLog files in a commit
 354         project_files = [f for f in self.info.modified_files
 355                          if (self.is_changelog_filename(f[0], allow_suffix=True) and f[1] != 'D')
 356                          or f[0] in misc_files]
 357         ignored_files = [f for f in self.info.modified_files
 358                          if self.in_ignored_location(f[0])]
 359         if len(project_files) == len(self.info.modified_files):
 360             # All modified files are only MISC files
 361             return
 362         elif project_files:
 363             err = 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
 364                   'should be done separately from normal commits\n' \
 365                   '(note: ChangeLog entries will be automatically ' \
 366                   'added by a cron job)'
 367             self.errors.append(Error(err))
 368             return
 369
 370         all_are_ignored = (len(project_files) + len(ignored_files)
 371                            == len(self.info.modified_files))
 372         self.parse_lines(all_are_ignored)
 373         if self.changes:
 374             self.parse_changelog()
 375             self.parse_file_names()
 376             self.check_for_empty_description()
 377             self.check_for_broken_parentheses()
 378             self.deduce_changelog_locations()
 379             self.check_file_patterns()
 380             self.check_line_start()
 381             if not self.errors:
 382                 self.check_mentioned_files()
 383                 self.check_for_correct_changelog()
 384         if self.subject_prs:
 385             self.errors.append(Error('PR %s in subject but not in changelog' %
 386                                      ', '.join(self.subject_prs), self.info.lines[0]))
 387
 388     @property
 389     def success(self):
 390         return not self.errors
 391
 392     @property
 393     def new_files(self):
 394         return [x[0] for x in self.info.modified_files if x[1] == 'A']
 395
 396     @classmethod
 397     def is_changelog_filename(cls, path, allow_suffix=False):
 398         basename = os.path.basename(path)
 399         if basename == 'ChangeLog':
 400             return True
 401         elif allow_suffix and basename.startswith('ChangeLog'):
 402             return True
 403         else:
 404             return False
 405
 406     def find_changelog_location(self, name):
 407         if name.startswith('\t'):
 408             name = name[1:]
 409         if name.endswith(':'):
 410             name = name[:-1]
 411         if name.endswith('/'):
 412             name = name[:-1]
 413         return name if name in self.changelog_locations else None
 414
 415     @classmethod
 416     def format_git_author(cls, author):
 417         assert '<' in author
 418         return author.replace('<', ' <')
 419
 420     @classmethod
 421     def parse_git_name_status(cls, string):
 422         modified_files = []
 423         for entry in string.split('\n'):
 424             parts = entry.split('\t')
 425             t = parts[0]
 426             if t == 'A' or t == 'D' or t == 'M':
 427                 modified_files.append((parts[1], t))
 428             elif t.startswith('R'):
 429                 modified_files.append((parts[1], 'D'))
 430                 modified_files.append((parts[2], 'A'))
 431         return modified_files
 432
 433     def init_changelog_locations(self, ref_name):
 434         self.changelog_locations = list(default_changelog_locations)
 435         if ref_name:
 436             version = sys.maxsize
 437             if 'releases/gcc-' in ref_name:
 438                 version = int(ref_name.split('-')[-1])
 439             if version >= 12:
 440                 # HSA and BRIG were removed in GCC 12
 441                 self.changelog_locations.remove('gcc/brig')
 442                 self.changelog_locations.remove('libhsail-rt')
 443
 444     def parse_lines(self, all_are_ignored):
 445         body = self.info.lines
 446
 447         for i, b in enumerate(body):
 448             if not b:
 449                 continue
 450             if (changelog_regex.match(b) or self.find_changelog_location(b)
 451                     or star_prefix_regex.match(b) or pr_regex.match(b)
 452                     or dr_regex.match(b) or author_line_regex.match(b)
 453                     or b.lower().startswith(CO_AUTHORED_BY_PREFIX)):
 454                 self.changes = body[i:]
 455                 return
 456         if not all_are_ignored:
 457             self.errors.append(Error('cannot find a ChangeLog location in '
 458                                      'message'))
 459
 460     def parse_changelog(self):
 461         last_entry = None
 462         will_deduce = False
 463         for line in self.changes:
 464             if not line:
 465                 if last_entry and will_deduce:
 466                     last_entry = None
 467                 continue
 468             if line != line.rstrip():
 469                 self.errors.append(Error('trailing whitespace', line))
 470             if len(line.replace('\t', ' ' * TAB_WIDTH)) > LINE_LIMIT:
 471                 # support long filenames
 472                 if not line.startswith('\t* ') or not line.endswith(':') or ' ' in line[3:-1]:
 473                     self.errors.append(Error('line exceeds %d character limit'
 474                                              % LINE_LIMIT, line))
 475             m = changelog_regex.match(line)
 476             if m:
 477                 last_entry = ChangeLogEntry(m.group(1).rstrip('/'),
 478                                             self.top_level_authors,
 479                                             self.top_level_prs)
 480                 self.changelog_entries.append(last_entry)
 481             elif self.find_changelog_location(line):
 482                 last_entry = ChangeLogEntry(self.find_changelog_location(line),
 483                                             self.top_level_authors,
 484                                             self.top_level_prs)
 485                 self.changelog_entries.append(last_entry)
 486             else:
 487                 author_tuple = None
 488                 pr_line = None
 489                 if author_line_regex.match(line):
 490                     m = author_line_regex.match(line)
 491                     author_tuple = (m.group('name'), m.group('datetime'))
 492                 elif additional_author_regex.match(line):
 493                     m = additional_author_regex.match(line)
 494                     if len(m.group('spaces')) != 4:
 495                         msg = 'additional author must be indented with '\
 496                               'one tab and four spaces'
 497                         self.errors.append(Error(msg, line))
 498                     else:
 499                         author_tuple = (m.group('name'), None)
 500                 elif pr_regex.match(line):
 501                     m = pr_regex.match(line)
 502                     component = m.group('component')
 503                     pr = m.group('pr')
 504                     if not component:
 505                         self.errors.append(Error('missing PR component', line))
 506                         continue
 507                     elif not component[:-1] in bug_components:
 508                         self.errors.append(Error('invalid PR component', line))
 509                         continue
 510                     else:
 511                         pr_line = line.lstrip()
 512                     if pr in self.subject_prs:
 513                         self.subject_prs.remove(pr)
 514                 elif dr_regex.match(line):
 515                     pr_line = line.lstrip()
 516
 517                 lowered_line = line.lower()
 518                 if lowered_line.startswith(CO_AUTHORED_BY_PREFIX):
 519                     name = line[len(CO_AUTHORED_BY_PREFIX):]
 520                     author = self.format_git_author(name)
 521                     self.co_authors.append(author)
 522                     continue
 523                 elif lowered_line.startswith(REVIEW_PREFIXES):
 524                     continue
 525                 else:
 526                     m = cherry_pick_regex.search(line)
 527                     if m:
 528                         commit = m.group('hash')
 529                         if self.cherry_pick_commit:
 530                             msg = 'multiple cherry pick lines'
 531                             self.errors.append(Error(msg, line))
 532                         else:
 533                             self.cherry_pick_commit = commit
 534                         continue
 535
 536                 # ChangeLog name will be deduced later
 537                 if not last_entry:
 538                     if author_tuple:
 539                         self.top_level_authors.append(author_tuple)
 540                         continue
 541                     elif pr_line:
 542                         # append to top_level_prs only when we haven't met
 543                         # a ChangeLog entry
 544                         if (pr_line not in self.top_level_prs
 545                                 and not self.changelog_entries):
 546                             self.top_level_prs.append(pr_line)
 547                         continue
 548                     else:
 549                         last_entry = ChangeLogEntry(None,
 550                                                     self.top_level_authors,
 551                                                     self.top_level_prs)
 552                         self.changelog_entries.append(last_entry)
 553                         will_deduce = True
 554                 elif author_tuple:
 555                     if not last_entry.contains_author(author_tuple[0]):
 556                         last_entry.author_lines.append(author_tuple)
 557                     continue
 558
 559                 if not line.startswith('\t'):
 560                     err = Error('line should start with a tab', line)
 561                     self.errors.append(err)
 562                 elif pr_line:
 563                     last_entry.prs.append(pr_line)
 564                 else:
 565                     m = star_prefix_regex.match(line)
 566                     if m:
 567                         if (len(m.group('spaces')) != 1 and
 568                                 not last_entry.parentheses_stack):
 569                             msg = 'one space should follow asterisk'
 570                             self.errors.append(Error(msg, line))
 571                         else:
 572                             content = m.group('content')
 573                             parts = content.split(':')
 574                             if len(parts) > 1:
 575                                 for needle in ('()', '[]', '<>'):
 576                                     if ' ' + needle in parts[0]:
 577                                         msg = f'empty group "{needle}" found'
 578                                         self.errors.append(Error(msg, line))
 579                             last_entry.lines.append(line)
 580                             self.process_parentheses(last_entry, line)
 581                     else:
 582                         if last_entry.is_empty:
 583                             msg = 'first line should start with a tab, ' \
 584                                   'an asterisk and a space'
 585                             self.errors.append(Error(msg, line))
 586                         else:
 587                             last_entry.lines.append(line)
 588                             self.process_parentheses(last_entry, line)
 589
 590     def process_parentheses(self, last_entry, line):
 591         for c in line:
 592             if c == '(':
 593                 last_entry.parentheses_stack.append(line)
 594             elif c == ')':
 595                 if not last_entry.parentheses_stack:
 596                     msg = 'bad wrapping of parenthesis'
 597                     self.errors.append(Error(msg, line))
 598                 else:
 599                     del last_entry.parentheses_stack[-1]
 600
 601     def parse_file_names(self):
 602         for entry in self.changelog_entries:
 603             entry.parse_file_names()
 604
 605     def check_file_patterns(self):
 606         for entry in self.changelog_entries:
 607             for pattern in entry.file_patterns:
 608                 name = os.path.join(entry.folder, pattern)
 609                 if not [name.startswith(pr) for pr in wildcard_prefixes]:
 610                     msg = 'unsupported wildcard prefix'
 611                     self.errors.append(Error(msg, name))
 612
 613     def check_for_empty_description(self):
 614         for entry in self.changelog_entries:
 615             for i, line in enumerate(entry.lines):
 616                 if (item_empty_regex.match(line) and
 617                     (i == len(entry.lines) - 1
 618                      or not entry.lines[i+1].strip()
 619                      or item_parenthesis_regex.match(entry.lines[i+1]))):
 620                     msg = 'missing description of a change'
 621                     self.errors.append(Error(msg, line))
 622
 623     def check_for_broken_parentheses(self):
 624         for entry in self.changelog_entries:
 625             if entry.parentheses_stack:
 626                 msg = 'bad parentheses wrapping'
 627                 self.errors.append(Error(msg, entry.parentheses_stack[-1]))
 628
 629     def check_line_start(self):
 630         for entry in self.changelog_entries:
 631             for line in entry.lines:
 632                 if line.startswith('\t '):
 633                     msg = 'extra space after tab'
 634                     self.errors.append(Error(msg, line))
 635
 636     def get_file_changelog_location(self, changelog_file):
 637         for file in self.info.modified_files:
 638             if file[0] == changelog_file:
 639                 # root ChangeLog file
 640                 return ''
 641             index = file[0].find('/' + changelog_file)
 642             if index != -1:
 643                 return file[0][:index]
 644         return None
 645
 646     def deduce_changelog_locations(self):
 647         for entry in self.changelog_entries:
 648             if entry.folder is None:
 649                 changelog = None
 650                 for file in entry.files:
 651                     location = self.get_file_changelog_location(file)
 652                     if (location == ''
 653                        or (location and location in self.changelog_locations)):
 654                         if changelog and changelog != location:
 655                             msg = 'could not deduce ChangeLog file, ' \
 656                                   'not unique location'
 657                             self.errors.append(Error(msg))
 658                             return
 659                         changelog = location
 660                 if changelog is not None:
 661                     entry.folder = changelog
 662                 else:
 663                     msg = 'could not deduce ChangeLog file'
 664                     self.errors.append(Error(msg))
 665
 666     @classmethod
 667     def in_ignored_location(cls, path):
 668         for ignored in ignored_prefixes:
 669             if path.startswith(ignored):
 670                 return True
 671         return False
 672
 673     def get_changelog_by_path(self, path):
 674         components = path.split('/')
 675         while components:
 676             if '/'.join(components) in self.changelog_locations:
 677                 break
 678             components = components[:-1]
 679         return '/'.join(components)
 680
 681     def check_mentioned_files(self):
 682         folder_count = len([x.folder for x in self.changelog_entries])
 683         assert folder_count == len(self.changelog_entries)
 684
 685         mentioned_files = set()
 686         mentioned_patterns = []
 687         used_patterns = set()
 688         for entry in self.changelog_entries:
 689             if not entry.files and not entry.file_patterns:
 690                 msg = 'no files mentioned for ChangeLog in directory'
 691                 self.errors.append(Error(msg, entry.folder))
 692             assert not entry.folder.endswith('/')
 693             for file in entry.files:
 694                 if not self.is_changelog_filename(file):
 695                     item = os.path.join(entry.folder, file)
 696                     if item in mentioned_files:
 697                         msg = 'same file specified multiple times'
 698                         self.errors.append(Error(msg, file))
 699                     else:
 700                         mentioned_files.add(item)
 701             for pattern in entry.file_patterns:
 702                 mentioned_patterns.append(os.path.join(entry.folder, pattern))
 703
 704         cand = [x[0] for x in self.info.modified_files
 705                 if not self.is_changelog_filename(x[0])]
 706         changed_files = set(cand)
 707         for file in sorted(mentioned_files - changed_files):
 708             msg = 'unchanged file mentioned in a ChangeLog'
 709             candidates = difflib.get_close_matches(file, changed_files, 1)
 710             details = None
 711             if candidates:
 712                 msg += f' (did you mean "{candidates[0]}"?)'
 713                 details = '\n'.join(difflib.Differ().compare([file], [candidates[0]])).rstrip()
 714             self.errors.append(Error(msg, file, details))
 715         auto_add_warnings = defaultdict(list)
 716         for file in sorted(changed_files - mentioned_files):
 717             if not self.in_ignored_location(file):
 718                 if file in self.new_files:
 719                     changelog_location = self.get_changelog_by_path(file)
 720                     # Python2: we cannot use next(filter(...))
 721                     entries = filter(lambda x: x.folder == changelog_location,
 722                                      self.changelog_entries)
 723                     entries = list(entries)
 724                     entry = entries[0] if entries else None
 725                     if not entry:
 726                         prs = self.top_level_prs
 727                         if not prs:
 728                             # if all ChangeLog entries have identical PRs
 729                             # then use them
 730                             if self.changelog_entries:
 731                                 prs = self.changelog_entries[0].prs
 732                                 for entry in self.changelog_entries:
 733                                     if entry.prs != prs:
 734                                         prs = []
 735                                         break
 736                         entry = ChangeLogEntry(changelog_location,
 737                                                self.top_level_authors,
 738                                                prs)
 739                         self.changelog_entries.append(entry)
 740                     # strip prefix of the file
 741                     assert file.startswith(entry.folder)
 742                     # do not allow auto-addition of New files
 743                     # for the top-level folder
 744                     if entry.folder:
 745                         file = file[len(entry.folder):].lstrip('/')
 746                         entry.lines.append('\t* %s: New file.' % file)
 747                         entry.files.append(file)
 748                         auto_add_warnings[entry.folder].append(file)
 749                     else:
 750                         msg = 'new file in the top-level folder not mentioned in a ChangeLog'
 751                         self.errors.append(Error(msg, file))
 752                 else:
 753                     used_pattern = [p for p in mentioned_patterns
 754                                     if file.startswith(p)]
 755                     used_pattern = used_pattern[0] if used_pattern else None
 756                     if used_pattern:
 757                         used_patterns.add(used_pattern)
 758                     else:
 759                         msg = 'changed file not mentioned in a ChangeLog'
 760                         self.errors.append(Error(msg, file))
 761
 762         for pattern in mentioned_patterns:
 763             if pattern not in used_patterns:
 764                 error = "pattern doesn't match any changed files"
 765                 self.errors.append(Error(error, pattern))
 766         for entry, val in auto_add_warnings.items():
 767             if len(val) == 1:
 768                 self.warnings.append(f"Auto-added new file '{entry}/{val[0]}'")
 769             else:
 770                 self.warnings.append(f"Auto-added {len(val)} new files in '{entry}'")
 771
 772     def check_for_correct_changelog(self):
 773         for entry in self.changelog_entries:
 774             for file in entry.files:
 775                 full_path = os.path.join(entry.folder, file)
 776                 changelog_location = self.get_changelog_by_path(full_path)
 777                 if changelog_location != entry.folder:
 778                     msg = 'wrong ChangeLog location "%s", should be "%s"'
 779                     err = Error(msg % (entry.folder, changelog_location), file)
 780                     self.errors.append(err)
 781
 782     @classmethod
 783     def format_authors_in_changelog(cls, authors, timestamp, prefix=''):
 784         output = ''
 785         for i, author in enumerate(authors):
 786             if i == 0:
 787                 output += '%s%s  %s\n' % (prefix, timestamp, author)
 788             else:
 789                 output += '%s\t    %s\n' % (prefix, author)
 790         output += '\n'
 791         return output
 792
 793     def to_changelog_entries(self, use_commit_ts=False):
 794         current_timestamp = self.info.date.strftime(DATE_FORMAT)
 795         for entry in self.changelog_entries:
 796             output = ''
 797             timestamp = entry.datetime
 798             if self.revert_commit:
 799                 timestamp = current_timestamp
 800                 orig_date = self.original_info.date
 801                 current_timestamp = orig_date.strftime(DATE_FORMAT)
 802             elif self.cherry_pick_commit:
 803                 info = (self.commit_to_info_hook
 804                         and self.commit_to_info_hook(self.cherry_pick_commit))
 805                 # it can happen that it is a cherry-pick for a different
 806                 # repository
 807                 if info:
 808                     timestamp = info.date.strftime(DATE_FORMAT)
 809                 else:
 810                     if self.commit_to_info_hook:
 811                         self.warnings.append(f"Cherry-picked commit not found: '{self.cherry_pick_commit}'")
 812                     else:
 813                         self.warnings.append(f"Invoked script can not obtain info about "
 814                                              f"cherry-picked commits such as '{self.revert_commit}'")
 815                     timestamp = current_timestamp
 816             elif not timestamp or use_commit_ts:
 817                 timestamp = current_timestamp
 818             authors = entry.authors if entry.authors else [self.info.author]
 819             # add Co-Authored-By authors to all ChangeLog entries
 820             for author in self.co_authors:
 821                 if author not in authors:
 822                     authors.append(author)
 823
 824             if self.cherry_pick_commit or self.revert_commit:
 825                 original_author = self.original_info.author
 826                 output += self.format_authors_in_changelog([original_author],
 827                                                            current_timestamp)
 828                 if self.revert_commit:
 829                     output += '\tRevert:\n'
 830                 else:
 831                     output += '\tBackported from master:\n'
 832                 output += self.format_authors_in_changelog(authors,
 833                                                            timestamp, '\t')
 834             else:
 835                 output += self.format_authors_in_changelog(authors, timestamp)
 836             for pr in entry.prs:
 837                 output += '\t%s\n' % pr
 838             for line in entry.lines:
 839                 output += line + '\n'
 840             yield (entry.folder, output.rstrip())
 841
 842     def print_output(self):
 843         for entry, output in self.to_changelog_entries():
 844             print('------ %s/ChangeLog ------ ' % entry)
 845             print(output)
 846
 847     def print_errors(self):
 848         print('Errors:')
 849         for error in self.errors:
 850             print(error)
 851
 852     def print_warnings(self):
 853         if self.warnings:
 854             print('Warnings:')
 855             for warning in self.warnings:
 856                 print(warning)
 857
 858     def check_commit_email(self):
 859         # Parse 'Martin Liska  <mliska@suse.cz>'
 860         email = self.info.author.split(' ')[-1].strip('<>')
 861
 862         # Verify that all characters are ASCII
 863         # TODO: Python 3.7 provides a nicer function: isascii
 864         if len(email) != len(email.encode()):
 865             self.errors.append(Error(f'non-ASCII characters in git commit email address ({email})'))