3 # This file is part of GCC.
5 # GCC is free software; you can redistribute it and/or modify it under
6 # the terms of the GNU General Public License as published by the Free
7 # Software Foundation; either version 3, or (at your option) any later
10 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
11 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
12 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 # You should have received a copy of the GNU General Public License
16 # along with GCC; see the file COPYING3. If not see
17 # <http://www.gnu.org/licenses/>. */
24 default_changelog_locations
= {
28 'contrib/header-tools',
62 'libgcc/config/avr/libf7',
63 'libgcc/config/libbid',
134 'gcc/go/gofrontend/',
135 'gcc/testsuite/gdc.test/',
136 'gcc/testsuite/go.test/test/',
138 'libphobos/libdruntime/',
143 wildcard_prefixes
= {
145 'libstdc++-v3/doc/html/',
146 'libstdc++-v3/testsuite/'
155 author_line_regex
= \
156 re
.compile(r
'^(?P<datetime>\d{4}-\d{2}-\d{2})\ {2}(?P<name>.* <.*>)')
157 additional_author_regex
= re
.compile(r
'^\t(?P<spaces>\ *)?(?P<name>.* <.*>)')
158 changelog_regex
= re
.compile(r
'^(?:[fF]or +)?([a-z0-9+-/]*)ChangeLog:?')
159 subject_pr_regex
= re
.compile(r
'(^|\W)PR\s+(?P<component>[a-zA-Z+-]+)/(?P<pr>\d{4,7})')
160 subject_pr2_regex
= re
.compile(r
'[(\[]PR\s*(?P<pr>\d{4,7})[)\]]')
161 pr_regex
= re
.compile(r
'\tPR (?P<component>[a-z+-]+\/)?(?P<pr>[0-9]+)$')
162 dr_regex
= re
.compile(r
'\tDR ([0-9]+)$')
163 star_prefix_regex
= re
.compile(r
'\t\*(?P<spaces>\ *)(?P<content>.*)')
164 end_of_location_regex
= re
.compile(r
'[\[<(:]')
165 item_empty_regex
= re
.compile(r
'\t(\* \S+ )?\(\S+\):\s*$')
166 item_parenthesis_regex
= re
.compile(r
'\t(\*|\(\S+\):)')
167 revert_regex
= re
.compile(r
'This reverts commit (?P<hash>\w+).$')
168 cherry_pick_regex
= re
.compile(r
'cherry picked from commit (?P<hash>\w+)')
172 CO_AUTHORED_BY_PREFIX
= 'co-authored-by: '
174 REVIEW_PREFIXES
= ('reviewed-by: ', 'reviewed-on: ', 'signed-off-by: ',
175 'acked-by: ', 'tested-by: ', 'reported-by: ',
177 DATE_FORMAT
= '%Y-%m-%d'
180 def decode_path(path
):
181 # When core.quotepath is true (default value), utf8 chars are encoded like:
182 # "b/ko\304\215ka.txt"
184 # The upstream bug is fixed:
185 # https://github.com/gitpython-developers/GitPython/issues/1099
187 # but we still need a workaround for older versions of the library.
188 # Please take a look at the explanation of the transformation:
189 # https://stackoverflow.com/questions/990169/how-do-convert-unicode-escape-sequences-to-unicode-characters-in-a-python-string
191 if path
.startswith('"') and path
.endswith('"'):
192 return (path
.strip('"').encode('utf8').decode('unicode-escape')
193 .encode('latin-1').decode('utf8'))
199 def __init__(self
, message
, line
=None):
200 self
.message
= message
206 s
+= ': "%s"' % self
.line
210 class ChangeLogEntry
:
211 def __init__(self
, folder
, authors
, prs
):
213 # The 'list.copy()' function is not available before Python 3.3
214 self
.author_lines
= list(authors
)
215 self
.initial_prs
= list(prs
)
219 self
.file_patterns
= []
220 self
.opened_parentheses
= 0
222 def parse_file_names(self
):
223 # Whether the content currently processed is between a star prefix the
224 # end of the file list: a colon or an open paren.
227 for line
in self
.lines
:
228 # If this line matches the star prefix, start the location
229 # processing on the information that follows the star.
230 # Note that we need to skip macro names that can be in form of:
232 # * config/i386/i386.md (*fix_trunc<mode>_i387_1,
233 # *add<mode>3_ne, *add<mode>3_eq_0, *add<mode>3_ne_0,
234 # *fist<mode>2_<rounding>_1, *<code><mode>3_1):
236 m
= star_prefix_regex
.match(line
)
237 if m
and len(m
.group('spaces')) == 1:
239 line
= m
.group('content')
242 # Strip everything that is not a filename in "line":
243 # entities "(NAME)", cases "<PATTERN>", conditions
244 # "[COND]", entry text (the colon, if present, and
245 # anything that follows it).
246 m
= end_of_location_regex
.search(line
)
248 line
= line
[:m
.start()]
251 # At this point, all that's left is a list of filenames
252 # separated by commas and whitespaces.
253 for file in line
.split(','):
256 if file.endswith('*'):
257 self
.file_patterns
.append(file[:-1])
259 self
.files
.append(file)
263 for author
in self
.author_lines
:
270 return [author_line
[0] for author_line
in self
.author_lines
]
274 return not self
.lines
and self
.prs
== self
.initial_prs
276 def contains_author(self
, author
):
277 for author_lines
in self
.author_lines
:
278 if author_lines
[0] == author
:
284 def __init__(self
, hexsha
, date
, author
, lines
, modified_files
):
289 self
.modified_files
= modified_files
293 def __init__(self
, info
, commit_to_info_hook
=None, ref_name
=None):
294 self
.original_info
= info
298 self
.changelog_entries
= []
300 self
.top_level_authors
= []
302 self
.top_level_prs
= []
303 self
.subject_prs
= set()
304 self
.cherry_pick_commit
= None
305 self
.revert_commit
= None
306 self
.commit_to_info_hook
= commit_to_info_hook
307 self
.init_changelog_locations(ref_name
)
309 # Skip Update copyright years commits
310 if self
.info
.lines
and self
.info
.lines
[0] == 'Update copyright years.':
313 if self
.info
.lines
and len(self
.info
.lines
) > 1 and self
.info
.lines
[1]:
314 self
.errors
.append(Error('Expected empty second line in commit message', info
.lines
[0]))
316 # Identify first if the commit is a Revert commit
317 for line
in self
.info
.lines
:
318 m
= revert_regex
.match(line
)
320 self
.revert_commit
= m
.group('hash')
322 if self
.revert_commit
:
323 self
.info
= self
.commit_to_info_hook(self
.revert_commit
)
325 # The following happens for get_email.py:
329 self
.check_commit_email()
331 # Extract PR numbers form the subject line
332 # Match either [PRnnnn] / (PRnnnn) or PR component/nnnn
333 if self
.info
.lines
and not self
.revert_commit
:
334 self
.subject_prs
= {m
.group('pr') for m
in subject_pr2_regex
.finditer(info
.lines
[0])}
335 for m
in subject_pr_regex
.finditer(info
.lines
[0]):
336 if not m
.group('component') in bug_components
:
337 self
.errors
.append(Error('invalid PR component in subject', info
.lines
[0]))
338 self
.subject_prs
.add(m
.group('pr'))
340 # Allow complete deletion of ChangeLog files in a commit
341 project_files
= [f
for f
in self
.info
.modified_files
342 if (self
.is_changelog_filename(f
[0], allow_suffix
=True) and f
[1] != 'D')
343 or f
[0] in misc_files
]
344 ignored_files
= [f
for f
in self
.info
.modified_files
345 if self
.in_ignored_location(f
[0])]
346 if len(project_files
) == len(self
.info
.modified_files
):
347 # All modified files are only MISC files
350 err
= 'ChangeLog, DATESTAMP, BASE-VER and DEV-PHASE updates ' \
351 'should be done separately from normal commits\n' \
352 '(note: ChangeLog entries will be automatically ' \
353 'added by a cron job)'
354 self
.errors
.append(Error(err
))
357 all_are_ignored
= (len(project_files
) + len(ignored_files
)
358 == len(self
.info
.modified_files
))
359 self
.parse_lines(all_are_ignored
)
361 self
.parse_changelog()
362 self
.parse_file_names()
363 self
.check_for_empty_description()
364 self
.check_for_broken_parentheses()
365 self
.deduce_changelog_locations()
366 self
.check_file_patterns()
368 self
.check_mentioned_files()
369 self
.check_for_correct_changelog()
371 self
.errors
.append(Error('PR %s in subject but not in changelog' %
372 ', '.join(self
.subject_prs
), self
.info
.lines
[0]))
376 return not self
.errors
380 return [x
[0] for x
in self
.info
.modified_files
if x
[1] == 'A']
383 def is_changelog_filename(cls
, path
, allow_suffix
=False):
384 basename
= os
.path
.basename(path
)
385 if basename
== 'ChangeLog':
387 elif allow_suffix
and basename
.startswith('ChangeLog'):
392 def find_changelog_location(self
, name
):
393 if name
.startswith('\t'):
395 if name
.endswith(':'):
397 if name
.endswith('/'):
399 return name
if name
in self
.changelog_locations
else None
402 def format_git_author(cls
, author
):
404 return author
.replace('<', ' <')
407 def parse_git_name_status(cls
, string
):
409 for entry
in string
.split('\n'):
410 parts
= entry
.split('\t')
412 if t
== 'A' or t
== 'D' or t
== 'M':
413 modified_files
.append((parts
[1], t
))
414 elif t
.startswith('R'):
415 modified_files
.append((parts
[1], 'D'))
416 modified_files
.append((parts
[2], 'A'))
417 return modified_files
419 def init_changelog_locations(self
, ref_name
):
420 self
.changelog_locations
= list(default_changelog_locations
)
422 version
= sys
.maxsize
423 if 'releases/gcc-' in ref_name
:
424 version
= int(ref_name
.split('-')[-1])
426 # HSA and BRIG were removed in GCC 12
427 self
.changelog_locations
.remove('gcc/brig')
428 self
.changelog_locations
.remove('libhsail-rt')
430 def parse_lines(self
, all_are_ignored
):
431 body
= self
.info
.lines
433 for i
, b
in enumerate(body
):
436 if (changelog_regex
.match(b
) or self
.find_changelog_location(b
)
437 or star_prefix_regex
.match(b
) or pr_regex
.match(b
)
438 or dr_regex
.match(b
) or author_line_regex
.match(b
)
439 or b
.lower().startswith(CO_AUTHORED_BY_PREFIX
)):
440 self
.changes
= body
[i
:]
442 if not all_are_ignored
:
443 self
.errors
.append(Error('cannot find a ChangeLog location in '
446 def parse_changelog(self
):
449 for line
in self
.changes
:
451 if last_entry
and will_deduce
:
454 if line
!= line
.rstrip():
455 self
.errors
.append(Error('trailing whitespace', line
))
456 if len(line
.replace('\t', ' ' * TAB_WIDTH
)) > LINE_LIMIT
:
457 # support long filenames
458 if not line
.startswith('\t* ') or not line
.endswith(':') or ' ' in line
[3:-1]:
459 self
.errors
.append(Error('line exceeds %d character limit'
461 m
= changelog_regex
.match(line
)
463 last_entry
= ChangeLogEntry(m
.group(1).rstrip('/'),
464 self
.top_level_authors
,
466 self
.changelog_entries
.append(last_entry
)
467 elif self
.find_changelog_location(line
):
468 last_entry
= ChangeLogEntry(self
.find_changelog_location(line
),
469 self
.top_level_authors
,
471 self
.changelog_entries
.append(last_entry
)
475 if author_line_regex
.match(line
):
476 m
= author_line_regex
.match(line
)
477 author_tuple
= (m
.group('name'), m
.group('datetime'))
478 elif additional_author_regex
.match(line
):
479 m
= additional_author_regex
.match(line
)
480 if len(m
.group('spaces')) != 4:
481 msg
= 'additional author must be indented with '\
482 'one tab and four spaces'
483 self
.errors
.append(Error(msg
, line
))
485 author_tuple
= (m
.group('name'), None)
486 elif pr_regex
.match(line
):
487 m
= pr_regex
.match(line
)
488 component
= m
.group('component')
491 self
.errors
.append(Error('missing PR component', line
))
493 elif not component
[:-1] in bug_components
:
494 self
.errors
.append(Error('invalid PR component', line
))
497 pr_line
= line
.lstrip()
498 if pr
in self
.subject_prs
:
499 self
.subject_prs
.remove(pr
)
500 elif dr_regex
.match(line
):
501 pr_line
= line
.lstrip()
503 lowered_line
= line
.lower()
504 if lowered_line
.startswith(CO_AUTHORED_BY_PREFIX
):
505 name
= line
[len(CO_AUTHORED_BY_PREFIX
):]
506 author
= self
.format_git_author(name
)
507 self
.co_authors
.append(author
)
509 elif lowered_line
.startswith(REVIEW_PREFIXES
):
512 m
= cherry_pick_regex
.search(line
)
514 commit
= m
.group('hash')
515 if self
.cherry_pick_commit
:
516 msg
= 'multiple cherry pick lines'
517 self
.errors
.append(Error(msg
, line
))
519 self
.cherry_pick_commit
= commit
522 # ChangeLog name will be deduced later
525 self
.top_level_authors
.append(author_tuple
)
528 # append to top_level_prs only when we haven't met
530 if (pr_line
not in self
.top_level_prs
531 and not self
.changelog_entries
):
532 self
.top_level_prs
.append(pr_line
)
535 last_entry
= ChangeLogEntry(None,
536 self
.top_level_authors
,
538 self
.changelog_entries
.append(last_entry
)
541 if not last_entry
.contains_author(author_tuple
[0]):
542 last_entry
.author_lines
.append(author_tuple
)
545 if not line
.startswith('\t'):
546 err
= Error('line should start with a tab', line
)
547 self
.errors
.append(err
)
549 last_entry
.prs
.append(pr_line
)
551 m
= star_prefix_regex
.match(line
)
553 if (len(m
.group('spaces')) != 1 and
554 last_entry
.opened_parentheses
== 0):
555 msg
= 'one space should follow asterisk'
556 self
.errors
.append(Error(msg
, line
))
558 content
= m
.group('content')
559 parts
= content
.split(':')
561 for needle
in ('()', '[]', '<>'):
562 if ' ' + needle
in parts
[0]:
563 msg
= f
'empty group "{needle}" found'
564 self
.errors
.append(Error(msg
, line
))
565 last_entry
.lines
.append(line
)
566 self
.process_parentheses(last_entry
, line
)
568 if last_entry
.is_empty
:
569 msg
= 'first line should start with a tab, ' \
570 'an asterisk and a space'
571 self
.errors
.append(Error(msg
, line
))
573 last_entry
.lines
.append(line
)
574 self
.process_parentheses(last_entry
, line
)
576 def process_parentheses(self
, last_entry
, line
):
579 last_entry
.opened_parentheses
+= 1
581 if last_entry
.opened_parentheses
== 0:
582 msg
= 'bad wrapping of parenthesis'
583 self
.errors
.append(Error(msg
, line
))
585 last_entry
.opened_parentheses
-= 1
587 def parse_file_names(self
):
588 for entry
in self
.changelog_entries
:
589 entry
.parse_file_names()
591 def check_file_patterns(self
):
592 for entry
in self
.changelog_entries
:
593 for pattern
in entry
.file_patterns
:
594 name
= os
.path
.join(entry
.folder
, pattern
)
595 if not [name
.startswith(pr
) for pr
in wildcard_prefixes
]:
596 msg
= 'unsupported wildcard prefix'
597 self
.errors
.append(Error(msg
, name
))
599 def check_for_empty_description(self
):
600 for entry
in self
.changelog_entries
:
601 for i
, line
in enumerate(entry
.lines
):
602 if (item_empty_regex
.match(line
) and
603 (i
== len(entry
.lines
) - 1
604 or not entry
.lines
[i
+1].strip()
605 or item_parenthesis_regex
.match(entry
.lines
[i
+1]))):
606 msg
= 'missing description of a change'
607 self
.errors
.append(Error(msg
, line
))
609 def check_for_broken_parentheses(self
):
610 for entry
in self
.changelog_entries
:
611 if entry
.opened_parentheses
!= 0:
612 msg
= 'bad parentheses wrapping'
613 self
.errors
.append(Error(msg
, entry
.lines
[0]))
615 def get_file_changelog_location(self
, changelog_file
):
616 for file in self
.info
.modified_files
:
617 if file[0] == changelog_file
:
618 # root ChangeLog file
620 index
= file[0].find('/' + changelog_file
)
622 return file[0][:index
]
625 def deduce_changelog_locations(self
):
626 for entry
in self
.changelog_entries
:
629 for file in entry
.files
:
630 location
= self
.get_file_changelog_location(file)
632 or (location
and location
in self
.changelog_locations
)):
633 if changelog
and changelog
!= location
:
634 msg
= 'could not deduce ChangeLog file, ' \
635 'not unique location'
636 self
.errors
.append(Error(msg
))
639 if changelog
is not None:
640 entry
.folder
= changelog
642 msg
= 'could not deduce ChangeLog file'
643 self
.errors
.append(Error(msg
))
646 def in_ignored_location(cls
, path
):
647 for ignored
in ignored_prefixes
:
648 if path
.startswith(ignored
):
652 def get_changelog_by_path(self
, path
):
653 components
= path
.split('/')
655 if '/'.join(components
) in self
.changelog_locations
:
657 components
= components
[:-1]
658 return '/'.join(components
)
660 def check_mentioned_files(self
):
661 folder_count
= len([x
.folder
for x
in self
.changelog_entries
])
662 assert folder_count
== len(self
.changelog_entries
)
664 mentioned_files
= set()
665 mentioned_patterns
= []
666 used_patterns
= set()
667 for entry
in self
.changelog_entries
:
668 if not entry
.files
and not entry
.file_patterns
:
669 msg
= 'no files mentioned for ChangeLog in directory'
670 self
.errors
.append(Error(msg
, entry
.folder
))
671 assert not entry
.folder
.endswith('/')
672 for file in entry
.files
:
673 if not self
.is_changelog_filename(file):
674 item
= os
.path
.join(entry
.folder
, file)
675 if item
in mentioned_files
:
676 msg
= 'same file specified multiple times'
677 self
.errors
.append(Error(msg
, file))
679 mentioned_files
.add(item
)
680 for pattern
in entry
.file_patterns
:
681 mentioned_patterns
.append(os
.path
.join(entry
.folder
, pattern
))
683 cand
= [x
[0] for x
in self
.info
.modified_files
684 if not self
.is_changelog_filename(x
[0])]
685 changed_files
= set(cand
)
686 for file in sorted(mentioned_files
- changed_files
):
687 msg
= 'unchanged file mentioned in a ChangeLog'
688 candidates
= difflib
.get_close_matches(file, changed_files
, 1)
690 msg
+= f
' (did you mean "{candidates[0]}"?)'
691 self
.errors
.append(Error(msg
, file))
692 for file in sorted(changed_files
- mentioned_files
):
693 if not self
.in_ignored_location(file):
694 if file in self
.new_files
:
695 changelog_location
= self
.get_changelog_by_path(file)
696 # Python2: we cannot use next(filter(...))
697 entries
= filter(lambda x
: x
.folder
== changelog_location
,
698 self
.changelog_entries
)
699 entries
= list(entries
)
700 entry
= entries
[0] if entries
else None
702 prs
= self
.top_level_prs
704 # if all ChangeLog entries have identical PRs
706 prs
= self
.changelog_entries
[0].prs
707 for entry
in self
.changelog_entries
:
711 entry
= ChangeLogEntry(changelog_location
,
712 self
.top_level_authors
,
714 self
.changelog_entries
.append(entry
)
715 # strip prefix of the file
716 assert file.startswith(entry
.folder
)
717 file = file[len(entry
.folder
):].lstrip('/')
718 entry
.lines
.append('\t* %s: New file.' % file)
719 entry
.files
.append(file)
721 used_pattern
= [p
for p
in mentioned_patterns
722 if file.startswith(p
)]
723 used_pattern
= used_pattern
[0] if used_pattern
else None
725 used_patterns
.add(used_pattern
)
727 msg
= 'changed file not mentioned in a ChangeLog'
728 self
.errors
.append(Error(msg
, file))
730 for pattern
in mentioned_patterns
:
731 if pattern
not in used_patterns
:
732 error
= "pattern doesn't match any changed files"
733 self
.errors
.append(Error(error
, pattern
))
735 def check_for_correct_changelog(self
):
736 for entry
in self
.changelog_entries
:
737 for file in entry
.files
:
738 full_path
= os
.path
.join(entry
.folder
, file)
739 changelog_location
= self
.get_changelog_by_path(full_path
)
740 if changelog_location
!= entry
.folder
:
741 msg
= 'wrong ChangeLog location "%s", should be "%s"'
742 err
= Error(msg
% (entry
.folder
, changelog_location
), file)
743 self
.errors
.append(err
)
746 def format_authors_in_changelog(cls
, authors
, timestamp
, prefix
=''):
748 for i
, author
in enumerate(authors
):
750 output
+= '%s%s %s\n' % (prefix
, timestamp
, author
)
752 output
+= '%s\t %s\n' % (prefix
, author
)
756 def to_changelog_entries(self
, use_commit_ts
=False):
757 current_timestamp
= self
.info
.date
.strftime(DATE_FORMAT
)
758 for entry
in self
.changelog_entries
:
760 timestamp
= entry
.datetime
761 if self
.revert_commit
:
762 timestamp
= current_timestamp
763 orig_date
= self
.original_info
.date
764 current_timestamp
= orig_date
.strftime(DATE_FORMAT
)
765 elif self
.cherry_pick_commit
:
766 info
= self
.commit_to_info_hook(self
.cherry_pick_commit
)
767 # it can happen that it is a cherry-pick for a different
770 timestamp
= info
.date
.strftime(DATE_FORMAT
)
772 timestamp
= current_timestamp
773 elif not timestamp
or use_commit_ts
:
774 timestamp
= current_timestamp
775 authors
= entry
.authors
if entry
.authors
else [self
.info
.author
]
776 # add Co-Authored-By authors to all ChangeLog entries
777 for author
in self
.co_authors
:
778 if author
not in authors
:
779 authors
.append(author
)
781 if self
.cherry_pick_commit
or self
.revert_commit
:
782 original_author
= self
.original_info
.author
783 output
+= self
.format_authors_in_changelog([original_author
],
785 if self
.revert_commit
:
786 output
+= '\tRevert:\n'
788 output
+= '\tBackported from master:\n'
789 output
+= self
.format_authors_in_changelog(authors
,
792 output
+= self
.format_authors_in_changelog(authors
, timestamp
)
794 output
+= '\t%s\n' % pr
795 for line
in entry
.lines
:
796 output
+= line
+ '\n'
797 yield (entry
.folder
, output
.rstrip())
799 def print_output(self
):
800 for entry
, output
in self
.to_changelog_entries():
801 print('------ %s/ChangeLog ------ ' % entry
)
804 def print_errors(self
):
806 for error
in self
.errors
:
809 def check_commit_email(self
):
810 # Parse 'Martin Liska <mliska@suse.cz>'
811 email
= self
.info
.author
.split(' ')[-1].strip('<>')
813 # Verify that all characters are ASCII
814 # TODO: Python 3.7 provides a nicer function: isascii
815 if len(email
) != len(email
.encode()):
816 self
.errors
.append(Error(f
'non-ASCII characters in git commit email address ({email})'))