3 # Copyright (C) 2020-2023 Free Software Foundation, Inc.
5 # This file is part of GCC.
7 # GCC is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 3, or (at your option)
12 # GCC is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with GCC; see the file COPYING. If not, write to
19 # the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 # Boston, MA 02110-1301, USA.
22 # This script parses a .diff file generated with 'diff -up' or 'diff -cp'
23 # and adds a skeleton ChangeLog file to the file. It does not try to be
24 # too smart when parsing function names, but it produces a reasonable
27 # Author: Martin Liska <mliska@suse.cz>
36 from itertools
import takewhile
40 from unidiff
import PatchSet
46 # +--------------------------------------------------+
47 # | gccrs: Some title |
48 # | | This is the "start"
49 # | This is some text explaining the commit. |
50 # | There can be several lines. |
51 # | |<------------------->
52 # | Signed-off-by: My Name <my@mail.com> | This is the "end"
53 # +--------------------------------------------------+
56 # +--------------------------------------------------+
57 # | gccrs: Some title |
59 # | This is some text explaining the commit. | This is the "start"
60 # | There can be several lines. |
61 # | |<------------------->
62 # | gcc/rust/ChangeLog: |
63 # | | This is the generated
64 # | * some_file (bla): | ChangeLog part
66 # | |<------------------->
67 # | Signed-off-by: My Name <my@mail.com> | This is the "end"
68 # +--------------------------------------------------+
70 # this regex matches the first line of the "end" in the initial commit message
71 FIRST_LINE_OF_END_RE
= re
.compile('(?i)^(signed-off-by:|co-authored-by:|#)')
73 pr_regex
= re
.compile(r
'(\/(\/|\*)|[Cc*!])\s+(?P<pr>PR [a-z+-]+\/[0-9]+)')
74 prnum_regex
= re
.compile(r
'PR (?P<comp>[a-z+-]+)/(?P<num>[0-9]+)')
75 dr_regex
= re
.compile(r
'(\/(\/|\*)|[Cc*!])\s+(?P<dr>DR [0-9]+)')
76 dg_regex
= re
.compile(r
'{\s+dg-(error|warning)')
77 pr_filename_regex
= re
.compile(r
'(^|[\W_])[Pp][Rr](?P<pr>\d{4,})')
78 identifier_regex
= re
.compile(r
'^([a-zA-Z0-9_#].*)')
79 comment_regex
= re
.compile(r
'^\/\*')
80 struct_regex
= re
.compile(r
'^(class|struct|union|enum)\s+'
81 r
'(GTY\(.*\)\s+)?([a-zA-Z0-9_]+)')
82 macro_regex
= re
.compile(r
'#\s*(define|undef)\s+([a-zA-Z0-9_]+)')
83 super_macro_regex
= re
.compile(r
'^DEF[A-Z0-9_]+\s*\(([a-zA-Z0-9_]+)')
84 fn_regex
= re
.compile(r
'([a-zA-Z_][^()\s]*)\s*\([^*]')
85 template_and_param_regex
= re
.compile(r
'<[^<>]*>')
86 md_def_regex
= re
.compile(r
'\(define.*\s+"(.*)"')
87 bugzilla_url
= 'https://gcc.gnu.org/bugzilla/rest.cgi/bug?id=%s&' \
88 'include_fields=summary,component'
90 function_extensions
= {'.c', '.cpp', '.C', '.cc', '.h', '.inc', '.def', '.md'}
92 # NB: Makefile.in isn't listed as it's not always generated.
93 generated_files
= {'aclocal.m4', 'config.h.in', 'configure'}
96 Generate ChangeLog template for PATCH.
97 PATCH must be generated using diff(1)'s -up or -cp options
98 (or their equivalent in git).
101 script_folder
= os
.path
.realpath(__file__
)
102 root
= os
.path
.dirname(os
.path
.dirname(script_folder
))
105 def find_changelog(path
):
106 folder
= os
.path
.split(path
)[0]
108 if os
.path
.exists(os
.path
.join(root
, folder
, 'ChangeLog')):
110 folder
= os
.path
.dirname(folder
)
113 raise AssertionError()
116 def extract_function_name(line
):
117 if comment_regex
.match(line
):
119 m
= struct_regex
.search(line
)
122 return m
.group(1) + ' ' + m
.group(3)
123 m
= macro_regex
.search(line
)
127 m
= super_macro_regex
.search(line
)
131 m
= fn_regex
.search(line
)
133 # Discard template and function parameters.
135 fn
= re
.sub(template_and_param_regex
, '', fn
)
140 def try_add_function(functions
, line
):
141 fn
= extract_function_name(line
)
142 if fn
and fn
not in functions
:
147 def sort_changelog_files(changed_file
):
148 return (changed_file
.is_added_file
, changed_file
.is_removed_file
)
151 def get_pr_titles(prs
):
153 for idx
, pr
in enumerate(prs
):
154 pr_id
= pr
.split('/')[-1]
155 r
= requests
.get(bugzilla_url
% pr_id
)
156 bugs
= r
.json()['bugs']
158 prs
[idx
] = 'PR %s/%s' % (bugs
[0]['component'], pr_id
)
159 out
= '%s - %s\n' % (prs
[idx
], bugs
[0]['summary'])
160 if out
not in output
:
164 return '\n'.join(output
)
167 def append_changelog_line(out
, relative_path
, text
):
168 line
= f
'\t* {relative_path}:'
169 if len(line
.replace('\t', ' ' * TAB_WIDTH
) + ' ' + text
) <= LINE_LIMIT
:
170 out
+= f
'{line} {text}\n'
177 def get_rel_path_if_prefixed(path
, folder
):
178 if path
.startswith(folder
):
179 return path
[len(folder
):].lstrip('/')
184 def generate_changelog(data
, no_functions
=False, fill_pr_titles
=False,
185 additional_prs
=None):
192 diff
= PatchSet(data
)
195 for apr
in additional_prs
:
196 if not apr
.startswith('PR ') and '/' in apr
:
201 # skip files that can't be parsed
202 if file.path
== '/dev/null':
204 changelog
= find_changelog(file.path
)
205 if changelog
not in changelogs
:
206 changelogs
[changelog
] = []
207 changelog_list
.append(changelog
)
208 changelogs
[changelog
].append(file)
210 # Extract PR entries from newly added tests
211 if 'testsuite' in file.path
and file.is_added_file
:
212 # Only search first ten lines as later lines may
213 # contains commented code which a note that it
214 # has not been tested due to a certain PR or DR.
218 for line
in hunks
[0][0:10]:
219 m
= pr_regex
.search(line
.value
)
224 this_file_prs
.append(pr
.split('/')[-1])
226 m
= dr_regex
.search(line
.value
)
231 this_file_prs
.append(dr
.split('/')[-1])
232 elif dg_regex
.search(line
.value
):
233 # Found dg-warning/dg-error line
236 # PR number in the file name
237 fname
= os
.path
.basename(file.path
)
238 m
= pr_filename_regex
.search(fname
)
242 if pr
not in this_file_prs
and pr2
not in prs
:
246 out
+= get_pr_titles(prs
)
248 # print list of PR entries before ChangeLog entries
256 # sort ChangeLog so that 'testsuite' is at the end
257 for changelog
in sorted(changelog_list
, key
=lambda x
: 'testsuite' in x
):
258 files
= changelogs
[changelog
]
259 out
+= '%s:\n' % os
.path
.join(changelog
, 'ChangeLog')
261 # new and deleted files should be at the end
262 for file in sorted(files
, key
=sort_changelog_files
):
263 assert file.path
.startswith(changelog
)
264 in_tests
= 'testsuite' in changelog
or 'testsuite' in file.path
265 relative_path
= get_rel_path_if_prefixed(file.path
, changelog
)
267 if file.is_added_file
:
268 msg
= 'New test.' if in_tests
else 'New file.'
269 out
= append_changelog_line(out
, relative_path
, msg
)
270 elif file.is_removed_file
:
271 out
= append_changelog_line(out
, relative_path
, 'Removed.')
272 elif hasattr(file, 'is_rename') and file.is_rename
:
273 # A file can be theoretically moved to a location that
274 # belongs to a different ChangeLog. Let user fix it.
276 # Since unidiff 0.7.0, path.file == path.target_file[2:],
277 # it used to be path.source_file[2:]
278 relative_path
= get_rel_path_if_prefixed(file.source_file
[2:],
280 out
= append_changelog_line(out
, relative_path
, 'Moved to...')
281 new_path
= get_rel_path_if_prefixed(file.target_file
[2:],
283 out
+= f
'\t* {new_path}: ...here.\n'
284 elif os
.path
.basename(file.path
) in generated_files
:
285 out
+= '\t* %s: Regenerate.\n' % (relative_path
)
286 append_changelog_line(out
, relative_path
, 'Regenerate.')
290 # Do not add function names for testsuite files
291 extension
= os
.path
.splitext(relative_path
)[1]
292 if not in_tests
and extension
in function_extensions
:
294 modified_visited
= False
297 m
= identifier_regex
.match(line
.value
)
298 if line
.is_added
or line
.is_removed
:
299 # special-case definition in .md files
300 m2
= md_def_regex
.match(line
.value
)
301 if extension
== '.md' and m2
:
303 if fn
not in functions
:
308 if not line
.value
.strip():
310 modified_visited
= True
311 if m
and try_add_function(functions
,
315 elif line
.is_context
:
316 if last_fn
and modified_visited
:
317 try_add_function(functions
, last_fn
)
319 modified_visited
= False
323 modified_visited
= False
325 try_add_function(functions
,
328 out
+= '\t* %s (%s):\n' % (relative_path
, functions
[0])
329 for fn
in functions
[1:]:
330 out
+= '\t(%s):\n' % fn
332 out
+= '\t* %s:\n' % relative_path
337 def update_copyright(data
):
338 current_timestamp
= datetime
.datetime
.now().strftime('%Y-%m-%d')
339 username
= subprocess
.check_output('git config user.name', shell
=True,
340 encoding
='utf8').strip()
341 email
= subprocess
.check_output('git config user.email', shell
=True,
342 encoding
='utf8').strip()
345 diff
= PatchSet(data
)
348 changelog
= os
.path
.join(find_changelog(file.path
), 'ChangeLog')
349 if changelog
not in changelogs
:
350 changelogs
.add(changelog
)
351 with
open(changelog
) as f
:
353 with
open(changelog
, 'w+') as f
:
354 f
.write(f
'{current_timestamp} {username} <{email}>\n\n')
355 f
.write('\tUpdate copyright years.\n\n')
359 def skip_line_in_changelog(line
):
360 return FIRST_LINE_OF_END_RE
.match(line
) == None
362 if __name__
== '__main__':
363 extra_args
= os
.getenv('GCC_MKLOG_ARGS')
365 sys
.argv
+= json
.loads(extra_args
)
367 parser
= argparse
.ArgumentParser(description
=help_message
)
368 parser
.add_argument('input', nargs
='?',
369 help='Patch file (or missing, read standard input)')
370 parser
.add_argument('-b', '--pr-numbers', action
='store',
371 type=lambda arg
: arg
.split(','), nargs
='?',
372 help='Add the specified PRs (comma separated)')
373 parser
.add_argument('-s', '--no-functions', action
='store_true',
374 help='Do not generate function names in ChangeLogs')
375 parser
.add_argument('-p', '--fill-up-bug-titles', action
='store_true',
376 help='Download title of mentioned PRs')
377 parser
.add_argument('-d', '--directory',
378 help='Root directory where to search for ChangeLog '
380 parser
.add_argument('-c', '--changelog',
381 help='Append the ChangeLog to a git commit message '
383 parser
.add_argument('--update-copyright', action
='store_true',
384 help='Update copyright in ChangeLog files')
385 parser
.add_argument('-a', '--append', action
='store_true',
386 help='Append the generate ChangeLog to the patch file')
387 args
= parser
.parse_args()
388 if args
.input == '-':
391 root
= args
.directory
393 data
= open(args
.input, newline
='\n') if args
.input else sys
.stdin
394 if args
.update_copyright
:
395 update_copyright(data
)
397 output
= generate_changelog(data
, args
.no_functions
,
398 args
.fill_up_bug_titles
, args
.pr_numbers
)
401 raise Exception("`-a or --append` option not support standard "
404 with
open(args
.input, 'r', newline
='\n') as f
:
405 # 1 -> not find the possible start of diff log
406 # 2 -> find the possible start of diff log
407 # 3 -> finish add ChangeLog to the patch file
410 if maybe_diff_log
== 1 and line
== "---\n":
412 elif (maybe_diff_log
== 2 and
413 re
.match(r
"\s[^\s]+\s+\|\s+\d+\s[+\-]+\n", line
)):
414 lines
+= [output
, "---\n", line
]
417 # the possible start is not the true start.
418 if maybe_diff_log
== 2:
419 lines
.append("---\n")
422 with
open(args
.input, "w") as f
:
425 lines
= open(args
.changelog
).read().split('\n')
426 start
= list(takewhile(skip_line_in_changelog
, lines
))
427 end
= lines
[len(start
):]
428 with
open(args
.changelog
, 'w') as f
:
429 if not start
or not start
[0]:
431 # initial commit subject line 'component: [PRnnnnn]'
432 m
= prnum_regex
.match(prs
[0])
434 title
= f
'{m.group("comp")}: [PR{m.group("num")}]'
435 start
.insert(0, title
)
441 # append 2 empty lines
443 f
.write('\n'.join(start
))
446 f
.write('\n'.join(end
))
448 print(output
, end
='')