3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 2001 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 # Support both Python 2 and 3
23 from __future__
import print_function
25 from collections
import OrderedDict
37 def open_text(filename
, mode
="r", encoding
="utf-8"):
38 """An open() which removes some differences between Python 2 and 3 and
41 Unlike the builtin open by default utf-8 is use and not the locale
42 encoding (which is ANSI on Windows for example, not very helpful)
44 For Python 2, files are opened in text mode like with Python 3.
47 if mode
not in ("r", "w"):
48 raise ValueError("mode %r not supported, must be 'r' or 'w'" % mode
)
51 return open(filename
, mode
, encoding
=encoding
)
53 # We can't use io.open() here as its write method is too strict and
54 # only allows unicode instances and not everything in the codebase
55 # forces unicode at the moment. codecs.open() on the other hand
56 # happily takes ASCII str and decodes it.
57 return codecs
.open(filename
, mode
, encoding
=encoding
)
61 """Check GTKDOC_TRACE environment variable.
63 Set python log level to the value of the environment variable (DEBUG, INFO,
64 WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
66 log_level
= os
.environ
.get('GTKDOC_TRACE')
70 logging
.basicConfig(stream
=sys
.stdout
,
71 level
=logging
.getLevelName(log_level
.upper()),
72 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
73 # When redirecting the output on python2 we get UnicodeEncodeError:
74 if not sys
.stdout
.encoding
:
76 sys
.stdout
= codecs
.getwriter('utf8')(sys
.stdout
)
79 def UpdateFileIfChanged(old_file
, new_file
, make_backup
):
80 """Compares the old version of the file with the new version and if the
81 file has changed it moves the new version into the old versions place. This
82 is used so we only change files if needed, so we can do proper dependency
86 old_file (str): The pathname of the old file.
87 new_file (str): The pathname of the new version of the file.
88 make_backup (bool): True if a backup of the old file should be kept.
89 It will have the .bak suffix added to the file name.
92 bool: It returns False if the file hasn't changed, and True if it has.
95 logging
.debug("Comparing %s with %s...", old_file
, new_file
)
97 if os
.path
.exists(old_file
):
98 old_contents
= open(old_file
, 'rb').read()
99 new_contents
= open(new_file
, 'rb').read()
100 if old_contents
== new_contents
:
102 logging
.debug("-> content is the same.")
106 backupname
= old_file
+ '.bak'
107 if os
.path
.exists(backupname
):
108 os
.unlink(backupname
)
109 os
.rename(old_file
, backupname
)
112 logging
.debug("-> content differs.")
114 logging
.debug("-> %s created.", old_file
)
116 os
.rename(new_file
, old_file
)
120 def GetModuleDocDir(module_name
):
121 """Get the docdir for the given module via pkg-config
124 module_name (string): The module, e.g. 'glib-2.0'
127 str: the doc directory or None
131 path
= subprocess
.check_output([config
.pkg_config
, '--variable=prefix', module_name
], universal_newlines
=True)
132 except subprocess
.CalledProcessError
:
134 return os
.path
.join(path
.strip(), 'share/gtk-doc/html')
137 def LogWarning(filename
, line
, message
):
138 """Log a warning in gcc style format
141 file (str): The file the error comes from
142 line (int): line number in the file
143 message (str): the error message to print
145 filename
= filename
or "unknown"
147 # TODO: write to stderr
148 print ("%s:%d: warning: %s" % (filename
, line
, message
))
151 def CreateValidSGMLID(xml_id
):
152 """Creates a valid SGML 'id' from the given string.
154 According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
155 tokens must begin with a letter ([A-Za-z]) and may be followed by any number
156 of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
159 When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
160 prevent name clashes (SGML ids are case-insensitive). (It basically never is
161 the case that mixed-case identifiers would collide.)
164 id (str): The text to be converted into a valid SGML id.
167 str: The converted id.
170 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
172 return "gettext-macro"
174 xml_id
= re
.sub(r
'[,;]', '', xml_id
)
175 xml_id
= re
.sub(r
'[_ ]', '-', xml_id
)
176 xml_id
= re
.sub(r
'^-+', '', xml_id
)
177 xml_id
= xml_id
.replace('::', '-')
178 xml_id
= xml_id
.replace(':', '--')
180 # Append ":CAPS" to all all-caps identifiers
181 # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
182 if xml_id
.isupper() and not xml_id
.endswith('-CAPS'):
188 # Parsing helpers (move to mkdb ?)
190 class ParseError(Exception):
194 def PreprocessStructOrEnum(declaration
):
195 """Trim a type declaration for display.
197 Removes private sections and comments from the declaration.
200 declaration (str): the type declaration (struct or enum)
203 str: the trimmed declaration
205 # Remove private symbols
206 # Assume end of declaration if line begins with '}'
207 declaration
= re
.sub(r
'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
208 '', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
210 # Remove all other comments
211 declaration
= re
.sub(r
'\n\s*/\*.*?\*/\s*\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
212 declaration
= re
.sub(r
'/\*([^*]+|\*(?!/))*\*/', r
' ', declaration
)
213 declaration
= re
.sub(r
'\n\s*//.*?\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
214 declaration
= re
.sub(r
'//.*', '', declaration
)
219 # TODO: output_function_params is always passed as 0
220 # TODO: we always pass both functions
221 def ParseStructDeclaration(declaration
, is_object
, output_function_params
, typefunc
=None, namefunc
=None):
222 """ Parse a struct declaration.
224 Takes a structure declaration and breaks it into individual type declarations.
227 declaration (str): the declaration to parse
228 is_object (bool): true if this is an object structure
229 output_function_params (bool): true if full type is wanted for function pointer members
230 typefunc (func): function to apply to type
231 namefunc (func): function to apply to name
234 dict: map of (symbol, decl) pairs describing the public declaration
237 # For forward struct declarations just return an empty array.
238 if re
.search(r
'(?:struct|union)\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
241 # Remove all private parts of the declaration
242 # For objects, assume private
244 declaration
= re
.sub(r
'''((?:struct|union)\s+\w*\s*\{)
246 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
247 r
'\1', declaration
, flags
=re
.MULTILINE | re
.DOTALL | re
.VERBOSE
)
249 # Remove g_iface, parent_instance and parent_class if they are first member
250 declaration
= re
.sub(r
'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r
'\1', declaration
)
252 declaration
= PreprocessStructOrEnum(declaration
)
254 if declaration
.strip() == '':
257 # Prime match after "struct/union {" declaration
258 match
= re
.search(r
'(?:struct|union)\s+\w*\s*\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
260 raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration
)
262 logging
.debug('public fields in struct/union: %s', declaration
)
264 result
= OrderedDict()
266 # Treat lines in sequence, allowing singly nested anonymous structs and unions.
267 for m
in re
.finditer(r
'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
270 logging
.debug('checking "%s"', line
)
272 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
275 # FIXME: Just ignore nested structs and unions for now
279 # ignore preprocessor directives
280 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
282 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
285 func_match
= re
.search(r
'''^
286 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
288 (\**(?:\s*restrict)?)\s* # ptr1
292 \(\s*\*\s*(\w+)\s*\)\s* # name
293 \(([^)]*)\)\s* # func_params
294 $''', line
, flags
=re
.VERBOSE
)
295 vars_match
= re
.search(r
'''^
296 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
298 (\** \s* const\s+)? # mod2
300 $''', line
, flags
=re
.VERBOSE
)
302 # Try to match structure members which are functions
304 mod1
= func_match
.group(1) or ''
305 if func_match
.group(2):
306 mod1
+= func_match
.group(2)
307 func_type
= func_match
.group(3)
308 ptr1
= func_match
.group(4)
309 mod2
= func_match
.group(5) or ''
310 ptr2
= func_match
.group(6)
311 mod3
= func_match
.group(7) or ''
312 name
= func_match
.group(8)
313 func_params
= func_match
.group(9)
316 ptype
= typefunc(func_type
, '<type>%s</type>' % func_type
)
319 pname
= namefunc(name
)
321 if output_function_params
:
322 result
[name
] = '%s%s%s%s%s%s (*%s) (%s)' % (
323 mod1
, ptype
, ptr1
, mod2
, ptr2
, mod3
, pname
, func_params
)
325 result
[name
] = '%s ()' % pname
327 # Try to match normal struct fields of comma-separated variables/
329 mod1
= vars_match
.group(1) or ''
330 if vars_match
.group(2):
331 mod1
+= vars_match
.group(2)
332 vtype
= vars_match
.group(3)
335 ptype
= typefunc(vtype
, '<type>%s</type>' % vtype
)
336 mod2
= vars_match
.group(4) or ''
339 var_list
= vars_match
.group(5)
341 logging
.debug('"%s" "%s" "%s" "%s"', mod1
, vtype
, mod2
, var_list
)
343 mod1
= mod1
.replace(' ', ' ')
344 mod2
= mod2
.replace(' ', ' ')
346 for n
in var_list
.split(','):
347 # Each variable can have any number of '*' before the identifier,
348 # and be followed by any number of pairs of brackets or a bit field specifier.
349 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
351 r
'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
356 array
= m
.group(3) or ''
362 if ptrs
and not ptrs
.endswith('*'):
365 array
= array
.replace(' ', ' ')
366 bits
= bits
.replace(' ', ' ')
370 pname
= namefunc(name
)
372 result
[name
] = '%s%s%s %s%s%s%s;' % (mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
374 logging
.debug('Matched line: %s%s%s %s%s%s%s', mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
376 logging
.warning('Cannot parse struct field: "%s"', n
)
379 logging
.warning('Cannot parse structure field: "%s"', line
)
384 def ParseEnumDeclaration(declaration
):
385 """Parse an enum declaration.
387 This function takes a enumeration declaration and breaks it into individual
388 enum member declarations.
391 declaration (str): the declaration to parse
394 str: list of strings describing the public declaration
397 # For forward struct declarations just return an empty array.
398 if re
.search(r
'enum\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
401 declaration
= PreprocessStructOrEnum(declaration
)
403 if declaration
.strip() == '':
408 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
409 # to avoid getting confused by commas they might contain. This doesn't
410 # handle nested parentheses correctly.
411 declaration
= re
.sub(r
'\([^)\n]+\)', '', declaration
)
413 # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
414 # confused with end of enumeration.
415 # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
416 declaration
= re
.sub(r
'\'.\'', '', declaration)
418 # Remove comma from comma - possible whitespace - closing brace sequence
419 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
420 # result in an actual enum member
421 declaration
= re
.sub(r
',(\s*})', r
'\1', declaration
)
423 # Prime match after "typedef enum {" declaration
424 match
= re
.search(r
'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
426 raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration
)
428 logging
.debug("public fields in enum: %s'", declaration
)
430 # Treat lines in sequence.
431 for m
in re
.finditer(r
'\s*([^,\}]+)([,\}])', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
433 terminator
= m
.group(2)
435 # ignore preprocessor directives
436 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
438 m1
= re
.search(r
'^(\w+)\s*(=.*)?$', line
, flags
=re
.MULTILINE | re
.DOTALL
)
439 # Special case for GIOCondition, where the values are specified by
440 # macros which expand to include the equal sign like '=1'.
441 m2
= re
.search(r
'^(\w+)\s*GLIB_SYSDEF_POLL', line
, flags
=re
.MULTILINE | re
.DOTALL
)
443 result
.append(m1
.group(1))
445 result
.append(m2
.group(1))
446 elif line
.strip().startswith('#'):
447 # Special case include of <gdk/gdkcursors.h>, just ignore it
448 # Special case for #ifdef/#else/#endif, just ignore it
451 logging
.warning('Cannot parse enumeration member: %s', line
)
453 if terminator
== '}':
459 def ParseFunctionDeclaration(declaration
, typefunc
, namefunc
):
460 """Parse a function declaration.
462 This function takes a function declaration and breaks it into individual
463 parameter declarations.
466 declaration (str): the declaration to parse
467 typefunc (func): function to apply to type
468 namefunc (func): function to apply to name
471 dict: map of (symbol, decl) pairs describing the prototype
474 result
= OrderedDict()
478 logging
.debug('decl=[%s]', declaration
)
480 # skip whitespace and commas
481 declaration
, n
= re
.subn(r
'^[\s,]+', '', declaration
)
485 declaration
, n
= re
.subn(r
'^void\s*[,\n]', '', declaration
)
488 logging
.warning('void used as parameter %d in function %s', param_num
, declaration
)
489 result
['void'] = namefunc('<type>void</type>')
493 declaration
, n
= re
.subn(r
'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration
)
495 result
['...'] = namefunc('...')
499 # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
501 regex
= r
'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
502 m
= re
.match(regex
, declaration
)
504 declaration
= re
.sub(regex
, '', declaration
)
506 pre
= m
.group(1) or ''
508 ptr
= m
.group(3) or ''
509 name
= m
.group(4) or ''
510 array
= m
.group(5) or ''
512 pre
= re
.sub(r
'\s+', ' ', pre
)
513 type = re
.sub(r
'\s+', ' ', type)
514 ptr
= re
.sub(r
'\s+', ' ', ptr
)
515 ptr
= re
.sub(r
'\s+$', '', ptr
)
516 if ptr
and not ptr
.endswith('*'):
519 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
521 m
= re
.search(r
'^((un)?signed .*)\s?', pre
)
528 name
= 'Param' + str(param_num
+ 1)
530 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
532 xref
= typefunc(type, '<type>%s</type>' % type)
533 result
[name
] = namefunc('%s%s %s%s%s' % (pre
, xref
, ptr
, name
, array
))
537 # Try to match parameters which are functions
538 # $1 $2 $3 $4 $5 $6 $7 $8
539 regex
= r
'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
540 m
= re
.match(regex
, declaration
)
542 declaration
= re
.sub(regex
, '', declaration
)
544 mod1
= m
.group(1) or ''
549 mod2
= m
.group(5) or ''
550 func_ptr
= m
.group(6)
552 func_params
= m
.group(8) or ''
554 if ptr1
and not ptr1
.endswith('*'):
556 func_ptr
= re
.sub(r
'\s+', ' ', func_ptr
)
558 logging
.debug('"%s" "%s" "%s" "%s" "%s"', mod1
, type, mod2
, func_ptr
, name
)
560 xref
= typefunc(type, '<type>%s</type>' % type)
561 result
[name
] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1
, xref
, ptr1
, mod2
, func_ptr
, name
, func_params
))
565 logging
.warning('Cannnot parse args for function in "%s"', declaration
)
571 def ParseMacroDeclaration(declaration
, namefunc
):
572 """Parse a macro declaration.
574 This function takes a macro declaration and breaks it into individual
575 parameter declarations.
578 declaration (str): the declaration to parse
579 namefunc (func): function to apply to name
582 dict: map of (symbol, decl) pairs describing the macro
585 result
= OrderedDict()
587 logging
.debug('decl=[%s]', declaration
)
589 m
= re
.search(r
'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration
)
592 params
= re
.sub(r
'\n', '', params
)
594 logging
.debug('params=[%s]', params
)
596 for param
in params
.split(','):
597 param
= param
.strip()
599 # Allow varargs variations
600 if param
.endswith('...'):
604 result
[param
] = namefunc(param
)