3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 2001 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 # Support both Python 2 and 3
23 from __future__
import print_function
25 from collections
import OrderedDict
37 def open_text(filename
, mode
='r', encoding
='utf-8'):
38 """An open() which removes some differences between Python 2 and 3 and
41 Unlike the builtin open by default utf-8 is use and not the locale
42 encoding (which is ANSI on Windows for example, not very helpful)
44 For Python 2, files are opened in text mode like with Python 3.
47 if mode
not in ('r', 'w'):
48 raise ValueError("mode %r not supported, must be 'r' or 'w'" % mode
)
51 return open(filename
, mode
, encoding
=encoding
)
53 # We can't use io.open() here as its write method is too strict and
54 # only allows unicode instances and not everything in the codebase
55 # forces unicode at the moment. codecs.open() on the other hand
56 # happily takes ASCII str and decodes it.
57 return codecs
.open(filename
, mode
, encoding
=encoding
)
61 """Check GTKDOC_TRACE environment variable.
63 Set python log level to the value of the environment variable (DEBUG, INFO,
64 WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
66 log_level
= os
.environ
.get('GTKDOC_TRACE')
70 logging
.basicConfig(stream
=sys
.stdout
,
71 level
=logging
.getLevelName(log_level
.upper()),
72 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
73 # When redirecting the output on python2 or if run with a non utf-8 locale
74 # we get UnicodeEncodeError:
75 encoding
= sys
.stdout
.encoding
76 if 'PYTHONIOENCODING' not in os
.environ
and (not encoding
or encoding
!= 'UTF-8'):
79 sys
.stdout
= open(sys
.stdout
.fileno(), mode
='w', encoding
='utf8', buffering
=1)
82 sys
.stdout
= codecs
.getwriter('utf8')(sys
.stdout
)
85 def UpdateFileIfChanged(old_file
, new_file
, make_backup
):
86 """Compares the old version of the file with the new version and if the
87 file has changed it moves the new version into the old versions place. This
88 is used so we only change files if needed, so we can do proper dependency
92 old_file (str): The pathname of the old file.
93 new_file (str): The pathname of the new version of the file.
94 make_backup (bool): True if a backup of the old file should be kept.
95 It will have the .bak suffix added to the file name.
98 bool: It returns False if the file hasn't changed, and True if it has.
101 logging
.debug("Comparing %s with %s...", old_file
, new_file
)
103 if os
.path
.exists(old_file
):
104 old_contents
= new_contents
= None
105 with
open(old_file
, 'rb') as f
:
106 old_contents
= f
.read()
107 with
open(new_file
, 'rb') as f
:
108 new_contents
= f
.read()
109 if old_contents
== new_contents
:
111 logging
.debug("-> content is the same.")
115 backupname
= old_file
+ '.bak'
116 if os
.path
.exists(backupname
):
117 os
.unlink(backupname
)
118 os
.rename(old_file
, backupname
)
121 logging
.debug("-> content differs.")
123 logging
.debug("-> %s created.", old_file
)
125 os
.rename(new_file
, old_file
)
129 def GetModuleDocDir(module_name
):
130 """Get the docdir for the given module via pkg-config
133 module_name (string): The module, e.g. 'glib-2.0'
136 str: the doc directory or None
140 path
= subprocess
.check_output([config
.pkg_config
, '--variable=prefix', module_name
], universal_newlines
=True)
141 except subprocess
.CalledProcessError
:
143 return os
.path
.join(path
.strip(), 'share/gtk-doc/html')
146 def LogWarning(filename
, line
, message
):
147 """Log a warning in gcc style format
150 file (str): The file the error comes from
151 line (int): line number in the file
152 message (str): the error message to print
154 filename
= filename
or "unknown"
156 # TODO: write to stderr
157 print("%s:%d: warning: %s" % (filename
, line
, message
))
160 def CreateValidSGMLID(xml_id
):
161 """Creates a valid SGML 'id' from the given string.
163 According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
164 tokens must begin with a letter ([A-Za-z]) and may be followed by any number
165 of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
168 When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
169 prevent name clashes (SGML ids are case-insensitive). (It basically never is
170 the case that mixed-case identifiers would collide.)
173 id (str): The text to be converted into a valid SGML id.
176 str: The converted id.
179 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
181 return "gettext-macro"
183 xml_id
= re
.sub(r
'[,;]', '', xml_id
)
184 xml_id
= re
.sub(r
'[_ ]', '-', xml_id
)
185 xml_id
= re
.sub(r
'^-+', '', xml_id
)
186 xml_id
= xml_id
.replace('::', '-')
187 xml_id
= xml_id
.replace(':', '--')
189 # Append ":CAPS" to all all-caps identifiers
190 # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
191 if xml_id
.isupper() and not xml_id
.endswith('-CAPS'):
197 # Parsing helpers (move to mkdb ?)
199 class ParseError(Exception):
203 def PreprocessStructOrEnum(declaration
):
204 """Trim a type declaration for display.
206 Removes private sections and comments from the declaration.
209 declaration (str): the type declaration (struct or enum)
212 str: the trimmed declaration
214 # Remove private symbols
215 # Assume end of declaration if line begins with '}'
216 declaration
= re
.sub(r
'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
217 '', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
219 # Remove all other comments
220 declaration
= re
.sub(r
'\n\s*/\*.*?\*/\s*\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
221 declaration
= re
.sub(r
'/\*([^*]+|\*(?!/))*\*/', r
' ', declaration
)
222 declaration
= re
.sub(r
'\n\s*//.*?\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
223 declaration
= re
.sub(r
'//.*', '', declaration
)
228 # TODO: output_function_params is always passed as 0
229 # TODO: we always pass both functions
230 def ParseStructDeclaration(declaration
, is_object
, output_function_params
, typefunc
=None, namefunc
=None):
231 """ Parse a struct declaration.
233 Takes a structure declaration and breaks it into individual type declarations.
236 declaration (str): the declaration to parse
237 is_object (bool): true if this is an object structure
238 output_function_params (bool): true if full type is wanted for function pointer members
239 typefunc (func): function to apply to type
240 namefunc (func): function to apply to name
243 dict: map of (symbol, decl) pairs describing the public declaration
246 # For forward struct declarations just return an empty array.
247 if re
.search(r
'(?:struct|union)\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
250 # Remove all private parts of the declaration
251 # For objects, assume private
253 declaration
= re
.sub(r
'''((?:struct|union)\s+\w*\s*\{)
255 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
256 r
'\1', declaration
, flags
=re
.MULTILINE | re
.DOTALL | re
.VERBOSE
)
258 # Remove g_iface, parent_instance and parent_class if they are first member
259 declaration
= re
.sub(r
'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r
'\1', declaration
)
261 declaration
= PreprocessStructOrEnum(declaration
)
263 if declaration
.strip() == '':
266 # Prime match after "struct/union {" declaration
267 match
= re
.search(r
'(?:struct|union)\s+\w*\s*\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
269 raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration
)
271 logging
.debug('public fields in struct/union: %s', declaration
)
273 result
= OrderedDict()
275 # Treat lines in sequence, allowing singly nested anonymous structs and unions.
276 for m
in re
.finditer(r
'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
279 logging
.debug('checking "%s"', line
)
281 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
284 # FIXME: Just ignore nested structs and unions for now
288 # ignore preprocessor directives
289 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
291 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
294 func_match
= re
.search(r
'''^
295 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
297 (\**(?:\s*restrict)?)\s* # ptr1
301 \(\s*\*\s*(\w+)\s*\)\s* # name
302 \(([^)]*)\)\s* # func_params
303 $''', line
, flags
=re
.VERBOSE
)
304 vars_match
= re
.search(r
'''^
305 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
307 (\** \s* const\s+)? # mod2
309 $''', line
, flags
=re
.VERBOSE
)
311 # Try to match structure members which are functions
313 mod1
= func_match
.group(1) or ''
314 if func_match
.group(2):
315 mod1
+= func_match
.group(2)
316 func_type
= func_match
.group(3)
317 ptr1
= func_match
.group(4)
318 mod2
= func_match
.group(5) or ''
319 ptr2
= func_match
.group(6)
320 mod3
= func_match
.group(7) or ''
321 name
= func_match
.group(8)
322 func_params
= func_match
.group(9)
325 ptype
= typefunc(func_type
, '<type>%s</type>' % func_type
)
328 pname
= namefunc(name
)
330 if output_function_params
:
331 result
[name
] = '%s%s%s%s%s%s (*%s) (%s)' % (
332 mod1
, ptype
, ptr1
, mod2
, ptr2
, mod3
, pname
, func_params
)
334 result
[name
] = '%s ()' % pname
336 # Try to match normal struct fields of comma-separated variables/
338 mod1
= vars_match
.group(1) or ''
339 if vars_match
.group(2):
340 mod1
+= vars_match
.group(2)
341 vtype
= vars_match
.group(3)
344 ptype
= typefunc(vtype
, '<type>%s</type>' % vtype
)
345 mod2
= vars_match
.group(4) or ''
348 var_list
= vars_match
.group(5)
350 logging
.debug('"%s" "%s" "%s" "%s"', mod1
, vtype
, mod2
, var_list
)
352 mod1
= mod1
.replace(' ', ' ')
353 mod2
= mod2
.replace(' ', ' ')
355 for n
in var_list
.split(','):
356 # Each variable can have any number of '*' before the identifier,
357 # and be followed by any number of pairs of brackets or a bit field specifier.
358 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
360 r
'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
365 array
= m
.group(3) or ''
371 if ptrs
and not ptrs
.endswith('*'):
374 array
= array
.replace(' ', ' ')
375 bits
= bits
.replace(' ', ' ')
379 pname
= namefunc(name
)
381 result
[name
] = '%s%s%s %s%s%s%s;' % (mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
383 logging
.debug('Matched line: %s%s%s %s%s%s%s', mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
385 logging
.warning('Cannot parse struct field: "%s"', n
)
388 logging
.warning('Cannot parse structure field: "%s"', line
)
393 def ParseEnumDeclaration(declaration
):
394 """Parse an enum declaration.
396 This function takes a enumeration declaration and breaks it into individual
397 enum member declarations.
400 declaration (str): the declaration to parse
403 str: list of strings describing the public declaration
406 # For forward struct declarations just return an empty array.
407 if re
.search(r
'enum\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
410 declaration
= PreprocessStructOrEnum(declaration
)
412 if declaration
.strip() == '':
417 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
418 # to avoid getting confused by commas they might contain. This doesn't
419 # handle nested parentheses correctly.
420 declaration
= re
.sub(r
'\([^)\n]+\)', '', declaration
)
422 # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
423 # confused with end of enumeration.
424 # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
425 declaration
= re
.sub(r
'\'.\'', '', declaration)
427 # Remove comma from comma - possible whitespace - closing brace sequence
428 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
429 # result in an actual enum member
430 declaration
= re
.sub(r
',(\s*})', r
'\1', declaration
)
432 # Prime match after "typedef enum {" declaration
433 match
= re
.search(r
'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
435 raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration
)
437 logging
.debug("public fields in enum: %s'", declaration
)
439 # Treat lines in sequence.
440 for m
in re
.finditer(r
'\s*([^,\}]+)([,\}])', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
442 terminator
= m
.group(2)
444 # ignore preprocessor directives
445 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
447 m1
= re
.search(r
'^(\w+)\s*(=.*)?$', line
, flags
=re
.MULTILINE | re
.DOTALL
)
448 # Special case for GIOCondition, where the values are specified by
449 # macros which expand to include the equal sign like '=1'.
450 m2
= re
.search(r
'^(\w+)\s*GLIB_SYSDEF_POLL', line
, flags
=re
.MULTILINE | re
.DOTALL
)
452 result
.append(m1
.group(1))
454 result
.append(m2
.group(1))
455 elif line
.strip().startswith('#'):
456 # Special case include of <gdk/gdkcursors.h>, just ignore it
457 # Special case for #ifdef/#else/#endif, just ignore it
460 logging
.warning('Cannot parse enumeration member: %s', line
)
462 if terminator
== '}':
468 def ParseFunctionDeclaration(declaration
, typefunc
, namefunc
):
469 """Parse a function declaration.
471 This function takes a function declaration and breaks it into individual
472 parameter declarations.
475 declaration (str): the declaration to parse
476 typefunc (func): function to apply to type
477 namefunc (func): function to apply to name
480 dict: map of (symbol, decl) pairs describing the prototype
483 result
= OrderedDict()
487 logging
.debug('decl=[%s]', declaration
)
489 # skip whitespace and commas
490 declaration
, n
= re
.subn(r
'^[\s,]+', '', declaration
)
494 declaration
, n
= re
.subn(r
'^void\s*[,\n]', '', declaration
)
497 logging
.warning('void used as parameter %d in function %s', param_num
, declaration
)
498 result
['void'] = namefunc('<type>void</type>')
502 declaration
, n
= re
.subn(r
'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration
)
504 result
['...'] = namefunc('...')
508 # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
510 regex
= r
'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
511 m
= re
.match(regex
, declaration
)
513 declaration
= re
.sub(regex
, '', declaration
)
515 pre
= m
.group(1) or ''
517 ptr
= m
.group(3) or ''
518 name
= m
.group(4) or ''
519 array
= m
.group(5) or ''
521 pre
= re
.sub(r
'\s+', ' ', pre
)
522 type = re
.sub(r
'\s+', ' ', type)
523 ptr
= re
.sub(r
'\s+', ' ', ptr
)
524 ptr
= re
.sub(r
'\s+$', '', ptr
)
525 if ptr
and not ptr
.endswith('*'):
528 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
530 m
= re
.search(r
'^((un)?signed .*)\s?', pre
)
537 name
= 'Param' + str(param_num
+ 1)
539 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
541 xref
= typefunc(type, '<type>%s</type>' % type)
542 result
[name
] = namefunc('%s%s %s%s%s' % (pre
, xref
, ptr
, name
, array
))
546 # Try to match parameters which are functions
547 # $1 $2 $3 $4 $5 $6 $7 $8
548 regex
= r
'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
549 m
= re
.match(regex
, declaration
)
551 declaration
= re
.sub(regex
, '', declaration
)
553 mod1
= m
.group(1) or ''
558 mod2
= m
.group(5) or ''
559 func_ptr
= m
.group(6)
561 func_params
= m
.group(8) or ''
563 if ptr1
and not ptr1
.endswith('*'):
565 func_ptr
= re
.sub(r
'\s+', ' ', func_ptr
)
567 logging
.debug('"%s" "%s" "%s" "%s" "%s"', mod1
, type, mod2
, func_ptr
, name
)
569 xref
= typefunc(type, '<type>%s</type>' % type)
570 result
[name
] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1
, xref
, ptr1
, mod2
, func_ptr
, name
, func_params
))
574 logging
.warning('Cannnot parse args for function in "%s"', declaration
)
580 def ParseMacroDeclaration(declaration
, namefunc
):
581 """Parse a macro declaration.
583 This function takes a macro declaration and breaks it into individual
584 parameter declarations.
587 declaration (str): the declaration to parse
588 namefunc (func): function to apply to name
591 dict: map of (symbol, decl) pairs describing the macro
594 result
= OrderedDict()
596 logging
.debug('decl=[%s]', declaration
)
598 m
= re
.search(r
'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration
)
601 params
= re
.sub(r
'\n', '', params
)
603 logging
.debug('params=[%s]', params
)
605 for param
in params
.split(','):
606 param
= param
.strip()
608 # Allow varargs variations
609 if param
.endswith('...'):
613 result
[param
] = namefunc(param
)