contrib/mklog.py

   1 #!/usr/bin/env python3
   2
   3 # Copyright (C) 2020-2023 Free Software Foundation, Inc.
   4 #
   5 # This file is part of GCC.
   6 #
   7 # GCC is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 3, or (at your option)
  10 # any later version.
  11 #
  12 # GCC is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with GCC; see the file COPYING.  If not, write to
  19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
  20 # Boston, MA 02110-1301, USA.
  21
  22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
  23 # and adds a skeleton ChangeLog file to the file. It does not try to be
  24 # too smart when parsing function names, but it produces a reasonable
  25 # approximation.
  26 #
  27 # Author: Martin Liska <mliska@suse.cz>
  28
  29 import argparse
  30 import datetime
  31 import json
  32 import os
  33 import re
  34 import subprocess
  35 import sys
  36 from itertools import takewhile
  37
  38 import requests
  39
  40 from unidiff import PatchSet
  41
  42 LINE_LIMIT = 100
  43 TAB_WIDTH = 8
  44
  45 # Initial commit:
  46 #   +--------------------------------------------------+
  47 #   | gccrs: Some title                                |
  48 #   |                                                  | This is the "start"
  49 #   | This is some text explaining the commit.         |
  50 #   | There can be several lines.                      |
  51 #   |                                                  |<------------------->
  52 #   | Signed-off-by: My Name <my@mail.com>             | This is the "end"
  53 #   +--------------------------------------------------+
  54 #
  55 # Results in:
  56 #   +--------------------------------------------------+
  57 #   | gccrs: Some title                                |
  58 #   |                                                  |
  59 #   | This is some text explaining the commit.         | This is the "start"
  60 #   | There can be several lines.                      |
  61 #   |                                                  |<------------------->
  62 #   | gcc/rust/ChangeLog:                              |
  63 #   |                                                  | This is the generated
  64 #   |         * some_file (bla):                       | ChangeLog part
  65 #   |         (foo):                                   |
  66 #   |                                                  |<------------------->
  67 #   | Signed-off-by: My Name <my@mail.com>             | This is the "end"
  68 #   +--------------------------------------------------+
  69
  70 # this regex matches the first line of the "end" in the initial commit message
  71 FIRST_LINE_OF_END_RE = re.compile('(?i)^(signed-off-by:|co-authored-by:|#)')
  72
  73 pr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
  74 prnum_regex = re.compile(r'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
  75 dr_regex = re.compile(r'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
  76 dg_regex = re.compile(r'{\s+dg-(error|warning)')
  77 pr_filename_regex = re.compile(r'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
  78 identifier_regex = re.compile(r'^([a-zA-Z0-9_#].*)')
  79 comment_regex = re.compile(r'^\/\*')
  80 struct_regex = re.compile(r'^(class|struct|union|enum)\s+'
  81                           r'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
  82 macro_regex = re.compile(r'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
  83 super_macro_regex = re.compile(r'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
  84 fn_regex = re.compile(r'([a-zA-Z_][^()\s]*)\s*\([^*]')
  85 template_and_param_regex = re.compile(r'<[^<>]*>')
  86 md_def_regex = re.compile(r'\(define.*\s+"(.*)"')
  87 bugzilla_url = 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
  88                'include_fields=summary,component'
  89
  90 function_extensions = {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
  91
  92 # NB: Makefile.in isn't listed as it's not always generated.
  93 generated_files = {'aclocal.m4', 'config.h.in', 'configure'}
  94
  95 help_message = """\
  96 Generate ChangeLog template for PATCH.
  97 PATCH must be generated using diff(1)'s -up or -cp options
  98 (or their equivalent in git).
  99 """
 100
 101 script_folder = os.path.realpath(__file__)
 102 root = os.path.dirname(os.path.dirname(script_folder))
 103
 104
 105 def find_changelog(path):
 106     folder = os.path.split(path)[0]
 107     while True:
 108         if os.path.exists(os.path.join(root, folder, 'ChangeLog')):
 109             return folder
 110         folder = os.path.dirname(folder)
 111         if folder == '':
 112             return folder
 113     raise AssertionError()
 114
 115
 116 def extract_function_name(line):
 117     if comment_regex.match(line):
 118         return None
 119     m = struct_regex.search(line)
 120     if m:
 121         # Struct declaration
 122         return m.group(1) + ' ' + m.group(3)
 123     m = macro_regex.search(line)
 124     if m:
 125         # Macro definition
 126         return m.group(2)
 127     m = super_macro_regex.search(line)
 128     if m:
 129         # Supermacro
 130         return m.group(1)
 131     m = fn_regex.search(line)
 132     if m:
 133         # Discard template and function parameters.
 134         fn = m.group(1)
 135         fn = re.sub(template_and_param_regex, '', fn)
 136         return fn.rstrip()
 137     return None
 138
 139
 140 def try_add_function(functions, line):
 141     fn = extract_function_name(line)
 142     if fn and fn not in functions:
 143         functions.append(fn)
 144     return bool(fn)
 145
 146
 147 def sort_changelog_files(changed_file):
 148     return (changed_file.is_added_file, changed_file.is_removed_file)
 149
 150
 151 def get_pr_titles(prs):
 152     output = []
 153     for idx, pr in enumerate(prs):
 154         pr_id = pr.split('/')[-1]
 155         r = requests.get(bugzilla_url % pr_id)
 156         bugs = r.json()['bugs']
 157         if len(bugs) == 1:
 158             prs[idx] = 'PR %s/%s' % (bugs[0]['component'], pr_id)
 159             out = '%s - %s\n' % (prs[idx], bugs[0]['summary'])
 160             if out not in output:
 161                 output.append(out)
 162     if output:
 163         output.append('')
 164     return '\n'.join(output)
 165
 166
 167 def append_changelog_line(out, relative_path, text):
 168     line = f'\t* {relative_path}:'
 169     if len(line.replace('\t', ' ' * TAB_WIDTH) + ' ' + text) <= LINE_LIMIT:
 170         out += f'{line} {text}\n'
 171     else:
 172         out += f'{line}\n'
 173         out += f'\t{text}\n'
 174     return out
 175
 176
 177 def get_rel_path_if_prefixed(path, folder):
 178     if path.startswith(folder):
 179         return path[len(folder):].lstrip('/')
 180     else:
 181         return path
 182
 183
 184 def generate_changelog(data, no_functions=False, fill_pr_titles=False,
 185                        additional_prs=None):
 186     global prs
 187     prs = []
 188
 189     changelogs = {}
 190     changelog_list = []
 191     out = ''
 192     diff = PatchSet(data)
 193
 194     if additional_prs:
 195         for apr in additional_prs:
 196             if not apr.startswith('PR ') and '/' in apr:
 197                 apr = 'PR ' + apr
 198             if apr not in prs:
 199                 prs.append(apr)
 200     for file in diff:
 201         # skip files that can't be parsed
 202         if file.path == '/dev/null':
 203             continue
 204         changelog = find_changelog(file.path)
 205         if changelog not in changelogs:
 206             changelogs[changelog] = []
 207             changelog_list.append(changelog)
 208         changelogs[changelog].append(file)
 209
 210         # Extract PR entries from newly added tests
 211         if 'testsuite' in file.path and file.is_added_file:
 212             # Only search first ten lines as later lines may
 213             # contains commented code which a note that it
 214             # has not been tested due to a certain PR or DR.
 215             this_file_prs = []
 216             hunks = list(file)
 217             if hunks:
 218                 for line in hunks[0][0:10]:
 219                     m = pr_regex.search(line.value)
 220                     if m:
 221                         pr = m.group('pr')
 222                         if pr not in prs:
 223                             prs.append(pr)
 224                             this_file_prs.append(pr.split('/')[-1])
 225                     else:
 226                         m = dr_regex.search(line.value)
 227                         if m:
 228                             dr = m.group('dr')
 229                             if dr not in prs:
 230                                 prs.append(dr)
 231                                 this_file_prs.append(dr.split('/')[-1])
 232                         elif dg_regex.search(line.value):
 233                             # Found dg-warning/dg-error line
 234                             break
 235
 236             # PR number in the file name
 237             fname = os.path.basename(file.path)
 238             m = pr_filename_regex.search(fname)
 239             if m:
 240                 pr = m.group('pr')
 241                 pr2 = 'PR ' + pr
 242                 if pr not in this_file_prs and pr2 not in prs:
 243                     prs.append(pr2)
 244
 245     if fill_pr_titles:
 246         out += get_pr_titles(prs)
 247
 248     # print list of PR entries before ChangeLog entries
 249     if prs:
 250         if not out:
 251             out += '\n'
 252         for pr in prs:
 253             out += '\t%s\n' % pr
 254         out += '\n'
 255
 256     # sort ChangeLog so that 'testsuite' is at the end
 257     for changelog in sorted(changelog_list, key=lambda x: 'testsuite' in x):
 258         files = changelogs[changelog]
 259         out += '%s:\n' % os.path.join(changelog, 'ChangeLog')
 260         out += '\n'
 261         # new and deleted files should be at the end
 262         for file in sorted(files, key=sort_changelog_files):
 263             assert file.path.startswith(changelog)
 264             in_tests = 'testsuite' in changelog or 'testsuite' in file.path
 265             relative_path = get_rel_path_if_prefixed(file.path, changelog)
 266             functions = []
 267             if file.is_added_file:
 268                 msg = 'New test.' if in_tests else 'New file.'
 269                 out = append_changelog_line(out, relative_path, msg)
 270             elif file.is_removed_file:
 271                 out = append_changelog_line(out, relative_path, 'Removed.')
 272             elif hasattr(file, 'is_rename') and file.is_rename:
 273                 # A file can be theoretically moved to a location that
 274                 # belongs to a different ChangeLog.  Let user fix it.
 275                 #
 276                 # Since unidiff 0.7.0, path.file == path.target_file[2:],
 277                 # it used to be path.source_file[2:]
 278                 relative_path = get_rel_path_if_prefixed(file.source_file[2:],
 279                                                          changelog)
 280                 out = append_changelog_line(out, relative_path, 'Moved to...')
 281                 new_path = get_rel_path_if_prefixed(file.target_file[2:],
 282                                                     changelog)
 283                 out += f'\t* {new_path}: ...here.\n'
 284             elif os.path.basename(file.path) in generated_files:
 285                 out += '\t* %s: Regenerate.\n' % (relative_path)
 286                 append_changelog_line(out, relative_path, 'Regenerate.')
 287             else:
 288                 if not no_functions:
 289                     for hunk in file:
 290                         # Do not add function names for testsuite files
 291                         extension = os.path.splitext(relative_path)[1]
 292                         if not in_tests and extension in function_extensions:
 293                             last_fn = None
 294                             modified_visited = False
 295                             success = False
 296                             for line in hunk:
 297                                 m = identifier_regex.match(line.value)
 298                                 if line.is_added or line.is_removed:
 299                                     # special-case definition in .md files
 300                                     m2 = md_def_regex.match(line.value)
 301                                     if extension == '.md' and m2:
 302                                         fn = m2.group(1)
 303                                         if fn not in functions:
 304                                             functions.append(fn)
 305                                             last_fn = None
 306                                             success = True
 307
 308                                     if not line.value.strip():
 309                                         continue
 310                                     modified_visited = True
 311                                     if m and try_add_function(functions,
 312                                                               m.group(1)):
 313                                         last_fn = None
 314                                         success = True
 315                                 elif line.is_context:
 316                                     if last_fn and modified_visited:
 317                                         try_add_function(functions, last_fn)
 318                                         last_fn = None
 319                                         modified_visited = False
 320                                         success = True
 321                                     elif m:
 322                                         last_fn = m.group(1)
 323                                         modified_visited = False
 324                             if not success:
 325                                 try_add_function(functions,
 326                                                  hunk.section_header)
 327                 if functions:
 328                     out += '\t* %s (%s):\n' % (relative_path, functions[0])
 329                     for fn in functions[1:]:
 330                         out += '\t(%s):\n' % fn
 331                 else:
 332                     out += '\t* %s:\n' % relative_path
 333         out += '\n'
 334     return out
 335
 336
 337 def update_copyright(data):
 338     current_timestamp = datetime.datetime.now().strftime('%Y-%m-%d')
 339     username = subprocess.check_output('git config user.name', shell=True,
 340                                        encoding='utf8').strip()
 341     email = subprocess.check_output('git config user.email', shell=True,
 342                                     encoding='utf8').strip()
 343
 344     changelogs = set()
 345     diff = PatchSet(data)
 346
 347     for file in diff:
 348         changelog = os.path.join(find_changelog(file.path), 'ChangeLog')
 349         if changelog not in changelogs:
 350             changelogs.add(changelog)
 351             with open(changelog) as f:
 352                 content = f.read()
 353             with open(changelog, 'w+') as f:
 354                 f.write(f'{current_timestamp}  {username}  <{email}>\n\n')
 355                 f.write('\tUpdate copyright years.\n\n')
 356                 f.write(content)
 357
 358
 359 def skip_line_in_changelog(line):
 360     return FIRST_LINE_OF_END_RE.match(line) == None
 361
 362 if __name__ == '__main__':
 363     extra_args = os.getenv('GCC_MKLOG_ARGS')
 364     if extra_args:
 365         sys.argv += json.loads(extra_args)
 366
 367     parser = argparse.ArgumentParser(description=help_message)
 368     parser.add_argument('input', nargs='?',
 369                         help='Patch file (or missing, read standard input)')
 370     parser.add_argument('-b', '--pr-numbers', action='store',
 371                         type=lambda arg: arg.split(','), nargs='?',
 372                         help='Add the specified PRs (comma separated)')
 373     parser.add_argument('-s', '--no-functions', action='store_true',
 374                         help='Do not generate function names in ChangeLogs')
 375     parser.add_argument('-p', '--fill-up-bug-titles', action='store_true',
 376                         help='Download title of mentioned PRs')
 377     parser.add_argument('-d', '--directory',
 378                         help='Root directory where to search for ChangeLog '
 379                         'files')
 380     parser.add_argument('-c', '--changelog',
 381                         help='Append the ChangeLog to a git commit message '
 382                              'file')
 383     parser.add_argument('--update-copyright', action='store_true',
 384                         help='Update copyright in ChangeLog files')
 385     parser.add_argument('-a', '--append', action='store_true',
 386                         help='Append the generate ChangeLog to the patch file')
 387     args = parser.parse_args()
 388     if args.input == '-':
 389         args.input = None
 390     if args.directory:
 391         root = args.directory
 392
 393     data = open(args.input, newline='\n') if args.input else sys.stdin
 394     if args.update_copyright:
 395         update_copyright(data)
 396     else:
 397         output = generate_changelog(data, args.no_functions,
 398                                     args.fill_up_bug_titles, args.pr_numbers)
 399         if args.append:
 400             if (not args.input):
 401                 raise Exception("`-a or --append` option not support standard "
 402                                 "input")
 403             lines = []
 404             with open(args.input, 'r', newline='\n') as f:
 405                 # 1 -> not find the possible start of diff log
 406                 # 2 -> find the possible start of diff log
 407                 # 3 -> finish add ChangeLog to the patch file
 408                 maybe_diff_log = 1
 409                 for line in f:
 410                     if maybe_diff_log == 1 and line == "---\n":
 411                         maybe_diff_log = 2
 412                     elif (maybe_diff_log == 2 and
 413                           re.match(r"\s[^\s]+\s+\|\s+\d+\s[+\-]+\n", line)):
 414                         lines += [output, "---\n", line]
 415                         maybe_diff_log = 3
 416                     else:
 417                         # the possible start is not the true start.
 418                         if maybe_diff_log == 2:
 419                             lines.append("---\n")
 420                             maybe_diff_log = 1
 421                         lines.append(line)
 422             with open(args.input, "w") as f:
 423                 f.writelines(lines)
 424         elif args.changelog:
 425             lines = open(args.changelog).read().split('\n')
 426             start = list(takewhile(skip_line_in_changelog, lines))
 427             end = lines[len(start):]
 428             with open(args.changelog, 'w') as f:
 429                 if not start or not start[0]:
 430                     if len(prs) == 1:
 431                         # initial commit subject line 'component: [PRnnnnn]'
 432                         m = prnum_regex.match(prs[0])
 433                         if m:
 434                             title = f'{m.group("comp")}: [PR{m.group("num")}]'
 435                             start.insert(0, title)
 436                 if start:
 437                     # append empty line
 438                     if start[-1] != '':
 439                         start.append('')
 440                 else:
 441                     # append 2 empty lines
 442                     start = 2 * ['']
 443                 f.write('\n'.join(start))
 444                 f.write('\n')
 445                 f.write(output)
 446                 f.write('\n'.join(end))
 447         else:
 448             print(output, end='')