3 # Check gcc.pot file for stylistic issues as described in
4 # https://gcc.gnu.org/onlinedocs/gccint/Guidelines-for-Diagnostics.html,
5 # especially in gcc-internal-format messages.
7 # This file is part of GCC.
9 # GCC is free software; you can redistribute it and/or modify it under
10 # the terms of the GNU General Public License as published by the Free
11 # Software Foundation; either version 3, or (at your option) any later
14 # GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
19 # You should have received a copy of the GNU General Public License
20 # along with GCC; see the file COPYING3. If not see
21 # <http://www.gnu.org/licenses/>.
25 from collections
import Counter
26 from typing
import Dict
, Match
30 seen_warnings
= Counter()
33 def location(msg
: polib
.POEntry
):
35 occ
= msg
.occurrences
[0]
36 return f
'{occ[0]}:{occ[1]}'
37 return '<unknown location>'
40 def warn(msg
: polib
.POEntry
,
41 diagnostic_id
: str, diagnostic
: str, include_msgid
=True):
43 To suppress a warning for a particular message,
44 add a line "#, gcclint:ignore:{diagnostic_id}" to the message.
47 if f
'gcclint:ignore:{diagnostic_id}' in msg
.flags
:
50 seen_warnings
[diagnostic
] += 1
53 print(f
'{location(msg)}: {diagnostic} in {repr(msg.msgid)}')
55 print(f
'{location(msg)}: {diagnostic}')
58 def lint_gcc_internal_format(msg
: polib
.POEntry
):
60 Checks a single message that has the gcc-internal-format. These
61 messages use a variety of placeholders like %qs, %<quotes%> and
65 msgid
: str = msg
.msgid
67 def outside_quotes(m
: Match
[str]):
68 before
= msgid
[:m
.start(0)]
69 return before
.count('%<') == before
.count('%>')
71 def lint_matching_placeholders():
73 Warns when literal values in placeholders are not exactly equal
74 in the translation. This can happen when doing copy-and-paste
75 translations of similar messages.
77 To avoid these mismatches in the first place,
78 structurally equal messages are found by
79 lint_diagnostics_differing_only_in_placeholders.
81 This check only applies when checking a finished translation
82 such as de.po, not gcc.pot.
85 if not msg
.translated():
88 in_msgid
= re
.findall('%<[^%]+%>', msgid
)
89 in_msgstr
= re
.findall('%<[^%]+%>', msg
.msgstr
)
91 if set(in_msgid
) != set(in_msgstr
):
93 'placeholder-mismatch',
94 f
'placeholder mismatch: msgid has {in_msgid}, '
95 f
'msgstr has {in_msgstr}',
98 def lint_option_outside_quotes():
99 for match
in re
.finditer(r
'\S+', msgid
):
101 if not outside_quotes(match
):
104 if part
.startswith('-'):
105 if len(part
) >= 2 and part
[1].isalpha():
110 'option-outside-quotes',
111 'command line option outside %<quotes%>')
113 if part
.startswith('__builtin_'):
115 'builtin-outside-quotes',
116 'builtin function outside %<quotes%>')
118 def lint_plain_apostrophe():
119 for match
in re
.finditer("[^%]'", msgid
):
120 if outside_quotes(match
):
121 warn(msg
, 'apostrophe', 'apostrophe without leading %')
123 def lint_space_before_quote():
125 A space before %< is often the result of string literals that
126 are joined by the C compiler and neither literal has a space
127 to separate the words.
130 for match
in re
.finditer('(.?[a-zA-Z0-9])%<', msgid
):
131 if match
.group(1) != '%s':
133 'no-space-before-quote',
134 '%< directly following a letter or digit')
136 def lint_underscore_outside_quotes():
138 An underscore outside of quotes is used in several contexts,
139 and many of them violate the GCC Guidelines for Diagnostics:
141 * names of GCC-internal compiler functions
142 * names of GCC-internal data structures
143 * static_cast and the like (which are legitimate)
146 for match
in re
.finditer('_', msgid
):
147 if outside_quotes(match
):
149 'underscore-outside-quotes',
150 'underscore outside of %<quotes%>')
155 The term "may not" may either mean "it could be the case"
156 or "should not". These two different meanings are sometimes
160 if re
.search(r
'\bmay not\b', msgid
):
163 'the term "may not" is ambiguous')
165 def lint_unbalanced_quotes():
166 if msgid
.count('%<') != msgid
.count('%>'):
169 'unbalanced %< and %> quotes')
172 if msg
.msgstr
.count('%<') != msg
.msgstr
.count('%>'):
175 'unbalanced %< and %> quotes')
177 def lint_single_space_after_sentence():
179 After a sentence there should be two spaces.
182 if re
.search(r
'[.] [A-Z]', msgid
):
184 'single-space-after-sentence',
185 'single space after sentence')
187 def lint_non_canonical_quotes():
189 Catches %<%s%>, which can be written in the shorter form %qs.
191 match
= re
.search("%<%s%>|'%s'|\"%s\"|`%s'", msgid
)
194 'non-canonical-quotes',
195 f
'placeholder {match.group()} should be written as %qs')
197 lint_option_outside_quotes()
198 lint_plain_apostrophe()
199 lint_space_before_quote()
200 lint_underscore_outside_quotes()
202 lint_unbalanced_quotes()
203 lint_matching_placeholders()
204 lint_single_space_after_sentence()
205 lint_non_canonical_quotes()
208 def lint_diagnostics_differing_only_in_placeholders(po
: polib
.POFile
):
210 Detects messages that are structurally the same, except that they
211 use different plain strings inside %<quotes%>. These messages can
212 be merged in order to prevent copy-and-paste mistakes by the
218 seen
: Dict
[str, polib
.POEntry
] = {}
224 normalized
= re
.sub('%<[^%]+%>', '%qs', msgid
)
225 if normalized
not in seen
:
226 seen
[normalized
] = msg
230 prev
= seen
[normalized
]
233 f
'same pattern for {repr(msgid)} and '
234 f
'{repr(prev.msgid)} in {location(prev)}',
238 def lint_file(po
: polib
.POFile
):
242 if not msg
.obsolete
and not msg
.fuzzy
:
243 if 'gcc-internal-format' in msg
.flags
:
244 lint_gcc_internal_format(msg
)
246 lint_diagnostics_differing_only_in_placeholders(po
)
250 parser
= argparse
.ArgumentParser(description
='')
251 parser
.add_argument('file', help='pot file')
253 args
= parser
.parse_args()
255 po
= polib
.pofile(args
.file)
260 for entry
in seen_warnings
.most_common():
262 print(f
'{entry[1]}\t{entry[0]}')
265 if __name__
== '__main__':