fixxref: update for the index.sgml -> devhelp2 change
[gtk-doc.git] / gtkdoc / common.py
blob9dfebef2a76f43d179c6074cbe647e2d42d14d70
1 # -*- python -*-
3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 2001 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 # Support both Python 2 and 3
23 from __future__ import print_function
25 from collections import OrderedDict
26 import logging
27 import os
28 import re
29 import subprocess
30 import sys
31 import six
32 import codecs
34 from . import config
37 def open_text(filename, mode="r", encoding="utf-8"):
38 """An open() which removes some differences between Python 2 and 3 and
39 has saner defaults.
41 Unlike the builtin open by default utf-8 is use and not the locale
42 encoding (which is ANSI on Windows for example, not very helpful)
44 For Python 2, files are opened in text mode like with Python 3.
45 """
47 if mode not in ("r", "w"):
48 raise ValueError("mode %r not supported, must be 'r' or 'w'" % mode)
50 if six.PY3:
51 return open(filename, mode, encoding=encoding)
52 else:
53 # We can't use io.open() here as its write method is too strict and
54 # only allows unicode instances and not everything in the codebase
55 # forces unicode at the moment. codecs.open() on the other hand
56 # happily takes ASCII str and decodes it.
57 return codecs.open(filename, mode, encoding=encoding)
60 def setup_logging():
61 """Check GTKDOC_TRACE environment variable.
63 Set python log level to the value of the environment variable (DEBUG, INFO,
64 WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
65 """
66 log_level = os.environ.get('GTKDOC_TRACE')
67 if log_level == '':
68 log_level = 'INFO'
69 if log_level:
70 logging.basicConfig(stream=sys.stdout,
71 level=logging.getLevelName(log_level.upper()),
72 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
73 # When redirecting the output on python2 we get UnicodeEncodeError:
74 if not sys.stdout.encoding:
75 import codecs
76 sys.stdout = codecs.getwriter('utf8')(sys.stdout)
79 def UpdateFileIfChanged(old_file, new_file, make_backup):
80 """Compares the old version of the file with the new version and if the
81 file has changed it moves the new version into the old versions place. This
82 is used so we only change files if needed, so we can do proper dependency
83 tracking.
85 Args:
86 old_file (str): The pathname of the old file.
87 new_file (str): The pathname of the new version of the file.
88 make_backup (bool): True if a backup of the old file should be kept.
89 It will have the .bak suffix added to the file name.
91 Returns:
92 bool: It returns False if the file hasn't changed, and True if it has.
93 """
95 logging.debug("Comparing %s with %s...", old_file, new_file)
97 if os.path.exists(old_file):
98 old_contents = open(old_file, 'rb').read()
99 new_contents = open(new_file, 'rb').read()
100 if old_contents == new_contents:
101 os.unlink(new_file)
102 logging.debug("-> content is the same.")
103 return False
105 if make_backup:
106 backupname = old_file + '.bak'
107 if os.path.exists(backupname):
108 os.unlink(backupname)
109 os.rename(old_file, backupname)
110 else:
111 os.unlink(old_file)
112 logging.debug("-> content differs.")
113 else:
114 logging.debug("-> %s created.", old_file)
116 os.rename(new_file, old_file)
117 return True
120 def GetModuleDocDir(module_name):
121 """Get the docdir for the given module via pkg-config
123 Args:
124 module_name (string): The module, e.g. 'glib-2.0'
126 Returns:
127 str: the doc directory or None
129 path = None
130 try:
131 path = subprocess.check_output([config.pkg_config, '--variable=prefix', module_name], universal_newlines=True)
132 except subprocess.CalledProcessError:
133 return None
134 return os.path.join(path.strip(), 'share/gtk-doc/html')
137 def LogWarning(filename, line, message):
138 """Log a warning in gcc style format
140 Args:
141 file (str): The file the error comes from
142 line (int): line number in the file
143 message (str): the error message to print
145 filename = filename or "unknown"
147 # TODO: write to stderr
148 print ("%s:%d: warning: %s" % (filename, line, message))
151 def CreateValidSGMLID(xml_id):
152 """Creates a valid SGML 'id' from the given string.
154 According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
155 tokens must begin with a letter ([A-Za-z]) and may be followed by any number
156 of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
157 and periods (".")."
159 When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
160 prevent name clashes (SGML ids are case-insensitive). (It basically never is
161 the case that mixed-case identifiers would collide.)
163 Args:
164 id (str): The text to be converted into a valid SGML id.
166 Returns:
167 str: The converted id.
170 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
171 if xml_id == '_':
172 return "gettext-macro"
174 xml_id = re.sub(r'[,;]', '', xml_id)
175 xml_id = re.sub(r'[_ ]', '-', xml_id)
176 xml_id = re.sub(r'^-+', '', xml_id)
177 xml_id = xml_id.replace('::', '-')
178 xml_id = xml_id.replace(':', '--')
180 # Append ":CAPS" to all all-caps identifiers
181 # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
182 if xml_id.isupper() and not xml_id.endswith('-CAPS'):
183 xml_id += ':CAPS'
185 return xml_id
188 # Parsing helpers (move to mkdb ?)
190 class ParseError(Exception):
191 pass
194 def PreprocessStructOrEnum(declaration):
195 """Trim a type declaration for display.
197 Removes private sections and comments from the declaration.
199 Args:
200 declaration (str): the type declaration (struct or enum)
202 Returns:
203 str: the trimmed declaration
205 # Remove private symbols
206 # Assume end of declaration if line begins with '}'
207 declaration = re.sub(r'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
208 '', declaration, flags=re.MULTILINE | re.DOTALL)
210 # Remove all other comments
211 declaration = re.sub(r'\n\s*/\*.*?\*/\s*\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
212 declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
213 declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
214 declaration = re.sub(r'//.*', '', declaration)
216 return declaration
219 # TODO: output_function_params is always passed as 0
220 # TODO: we always pass both functions
221 def ParseStructDeclaration(declaration, is_object, output_function_params, typefunc=None, namefunc=None):
222 """ Parse a struct declaration.
224 Takes a structure declaration and breaks it into individual type declarations.
226 Args:
227 declaration (str): the declaration to parse
228 is_object (bool): true if this is an object structure
229 output_function_params (bool): true if full type is wanted for function pointer members
230 typefunc (func): function to apply to type
231 namefunc (func): function to apply to name
233 Returns:
234 dict: map of (symbol, decl) pairs describing the public declaration
237 # For forward struct declarations just return an empty array.
238 if re.search(r'(?:struct|union)\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
239 return {}
241 # Remove all private parts of the declaration
242 # For objects, assume private
243 if is_object:
244 declaration = re.sub(r'''((?:struct|union)\s+\w*\s*\{)
246 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
247 r'\1', declaration, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
249 # Remove g_iface, parent_instance and parent_class if they are first member
250 declaration = re.sub(r'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r'\1', declaration)
252 declaration = PreprocessStructOrEnum(declaration)
254 if declaration.strip() == '':
255 return {}
257 # Prime match after "struct/union {" declaration
258 match = re.search(r'(?:struct|union)\s+\w*\s*\{', declaration, flags=re.MULTILINE | re.DOTALL)
259 if not match:
260 raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration)
262 logging.debug('public fields in struct/union: %s', declaration)
264 result = OrderedDict()
266 # Treat lines in sequence, allowing singly nested anonymous structs and unions.
267 for m in re.finditer(r'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
268 line = m.group(1)
270 logging.debug('checking "%s"', line)
272 if re.search(r'^\s*\}\s*\w*\s*$', line):
273 break
275 # FIXME: Just ignore nested structs and unions for now
276 if '{' in line:
277 continue
279 # ignore preprocessor directives
280 line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
282 if re.search(r'^\s*\}\s*\w*\s*$', line):
283 break
285 func_match = re.search(r'''^
286 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
287 (\w+)\s* # type
288 (\**(?:\s*restrict)?)\s* # ptr1
289 (const\s+)? # mod2
290 (\**\s*) # ptr2
291 (const\s+)? # mod3
292 \(\s*\*\s*(\w+)\s*\)\s* # name
293 \(([^)]*)\)\s* # func_params
294 $''', line, flags=re.VERBOSE)
295 vars_match = re.search(r'''^
296 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
297 (\w+)\s* # type
298 (\** \s* const\s+)? # mod2
299 (.*) # variables
300 $''', line, flags=re.VERBOSE)
302 # Try to match structure members which are functions
303 if func_match:
304 mod1 = func_match.group(1) or ''
305 if func_match.group(2):
306 mod1 += func_match.group(2)
307 func_type = func_match.group(3)
308 ptr1 = func_match.group(4)
309 mod2 = func_match.group(5) or ''
310 ptr2 = func_match.group(6)
311 mod3 = func_match.group(7) or ''
312 name = func_match.group(8)
313 func_params = func_match.group(9)
314 ptype = func_type
315 if typefunc:
316 ptype = typefunc(func_type, '<type>%s</type>' % func_type)
317 pname = name
318 if namefunc:
319 pname = namefunc(name)
321 if output_function_params:
322 result[name] = '%s%s%s%s%s%s&#160;(*%s)&#160;(%s)' % (
323 mod1, ptype, ptr1, mod2, ptr2, mod3, pname, func_params)
324 else:
325 result[name] = '%s&#160;()' % pname
327 # Try to match normal struct fields of comma-separated variables/
328 elif vars_match:
329 mod1 = vars_match.group(1) or ''
330 if vars_match.group(2):
331 mod1 += vars_match.group(2)
332 vtype = vars_match.group(3)
333 ptype = vtype
334 if typefunc:
335 ptype = typefunc(vtype, '<type>%s</type>' % vtype)
336 mod2 = vars_match.group(4) or ''
337 if mod2:
338 mod2 = ' ' + mod2
339 var_list = vars_match.group(5)
341 logging.debug('"%s" "%s" "%s" "%s"', mod1, vtype, mod2, var_list)
343 mod1 = mod1.replace(' ', '&#160;')
344 mod2 = mod2.replace(' ', '&#160;')
346 for n in var_list.split(','):
347 # Each variable can have any number of '*' before the identifier,
348 # and be followed by any number of pairs of brackets or a bit field specifier.
349 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
350 m = re.search(
351 r'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
352 n, flags=re.VERBOSE)
353 if m:
354 ptrs = m.group(1)
355 name = m.group(2)
356 array = m.group(3) or ''
357 bits = m.group(4)
358 if bits:
359 bits = ' ' + bits
360 else:
361 bits = ''
362 if ptrs and not ptrs.endswith('*'):
363 ptrs += ' '
365 array = array.replace(' ', '&#160;')
366 bits = bits.replace(' ', '&#160;')
368 pname = name
369 if namefunc:
370 pname = namefunc(name)
372 result[name] = '%s%s%s&#160;%s%s%s%s;' % (mod1, ptype, mod2, ptrs, pname, array, bits)
374 logging.debug('Matched line: %s%s%s %s%s%s%s', mod1, ptype, mod2, ptrs, pname, array, bits)
375 else:
376 logging.warning('Cannot parse struct field: "%s"', n)
378 else:
379 logging.warning('Cannot parse structure field: "%s"', line)
381 return result
384 def ParseEnumDeclaration(declaration):
385 """Parse an enum declaration.
387 This function takes a enumeration declaration and breaks it into individual
388 enum member declarations.
390 Args:
391 declaration (str): the declaration to parse
393 Returns:
394 str: list of strings describing the public declaration
397 # For forward struct declarations just return an empty array.
398 if re.search(r'enum\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
399 return ()
401 declaration = PreprocessStructOrEnum(declaration)
403 if declaration.strip() == '':
404 return ()
406 result = []
408 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
409 # to avoid getting confused by commas they might contain. This doesn't
410 # handle nested parentheses correctly.
411 declaration = re.sub(r'\([^)\n]+\)', '', declaration)
413 # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
414 # confused with end of enumeration.
415 # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
416 declaration = re.sub(r'\'.\'', '', declaration)
418 # Remove comma from comma - possible whitespace - closing brace sequence
419 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
420 # result in an actual enum member
421 declaration = re.sub(r',(\s*})', r'\1', declaration)
423 # Prime match after "typedef enum {" declaration
424 match = re.search(r'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration, flags=re.MULTILINE | re.DOTALL)
425 if not match:
426 raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration)
428 logging.debug("public fields in enum: %s'", declaration)
430 # Treat lines in sequence.
431 for m in re.finditer(r'\s*([^,\}]+)([,\}])', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
432 line = m.group(1)
433 terminator = m.group(2)
435 # ignore preprocessor directives
436 line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
438 m1 = re.search(r'^(\w+)\s*(=.*)?$', line, flags=re.MULTILINE | re.DOTALL)
439 # Special case for GIOCondition, where the values are specified by
440 # macros which expand to include the equal sign like '=1'.
441 m2 = re.search(r'^(\w+)\s*GLIB_SYSDEF_POLL', line, flags=re.MULTILINE | re.DOTALL)
442 if m1:
443 result.append(m1.group(1))
444 elif m2:
445 result.append(m2.group(1))
446 elif line.strip().startswith('#'):
447 # Special case include of <gdk/gdkcursors.h>, just ignore it
448 # Special case for #ifdef/#else/#endif, just ignore it
449 break
450 else:
451 logging.warning('Cannot parse enumeration member: %s', line)
453 if terminator == '}':
454 break
456 return result
459 def ParseFunctionDeclaration(declaration, typefunc, namefunc):
460 """Parse a function declaration.
462 This function takes a function declaration and breaks it into individual
463 parameter declarations.
465 Args:
466 declaration (str): the declaration to parse
467 typefunc (func): function to apply to type
468 namefunc (func): function to apply to name
470 Returns:
471 dict: map of (symbol, decl) pairs describing the prototype
474 result = OrderedDict()
476 param_num = 0
477 while declaration:
478 logging.debug('decl=[%s]', declaration)
480 # skip whitespace and commas
481 declaration, n = re.subn(r'^[\s,]+', '', declaration)
482 if n:
483 continue
485 declaration, n = re.subn(r'^void\s*[,\n]', '', declaration)
486 if n:
487 if param_num != 0:
488 logging.warning('void used as parameter %d in function %s', param_num, declaration)
489 result['void'] = namefunc('<type>void</type>')
490 param_num += 1
491 continue
493 declaration, n = re.subn(r'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration)
494 if n:
495 result['...'] = namefunc('...')
496 param_num += 1
497 continue
499 # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
500 # $1 $2 $3 $4 $5
501 regex = r'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
502 m = re.match(regex, declaration)
503 if m:
504 declaration = re.sub(regex, '', declaration)
506 pre = m.group(1) or ''
507 type = m.group(2)
508 ptr = m.group(3) or ''
509 name = m.group(4) or ''
510 array = m.group(5) or ''
512 pre = re.sub(r'\s+', ' ', pre)
513 type = re.sub(r'\s+', ' ', type)
514 ptr = re.sub(r'\s+', ' ', ptr)
515 ptr = re.sub(r'\s+$', '', ptr)
516 if ptr and not ptr.endswith('*'):
517 ptr += ' '
519 logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
521 m = re.search(r'^((un)?signed .*)\s?', pre)
522 if name == '' and m:
523 name = type
524 type = m.group(1)
525 pre = ''
527 if name == '':
528 name = 'Param' + str(param_num + 1)
530 logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
532 xref = typefunc(type, '<type>%s</type>' % type)
533 result[name] = namefunc('%s%s %s%s%s' % (pre, xref, ptr, name, array))
534 param_num += 1
535 continue
537 # Try to match parameters which are functions
538 # $1 $2 $3 $4 $5 $6 $7 $8
539 regex = r'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
540 m = re.match(regex, declaration)
541 if m:
542 declaration = re.sub(regex, '', declaration)
544 mod1 = m.group(1) or ''
545 if m.group(2):
546 mod1 += m.group(2)
547 type = m.group(3)
548 ptr1 = m.group(4)
549 mod2 = m.group(5) or ''
550 func_ptr = m.group(6)
551 name = m.group(7)
552 func_params = m.group(8) or ''
554 if ptr1 and not ptr1.endswith('*'):
555 ptr1 += ' '
556 func_ptr = re.sub(r'\s+', ' ', func_ptr)
558 logging.debug('"%s" "%s" "%s" "%s" "%s"', mod1, type, mod2, func_ptr, name)
560 xref = typefunc(type, '<type>%s</type>' % type)
561 result[name] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1, xref, ptr1, mod2, func_ptr, name, func_params))
562 param_num += 1
563 continue
565 logging.warning('Cannnot parse args for function in "%s"', declaration)
566 break
568 return result
571 def ParseMacroDeclaration(declaration, namefunc):
572 """Parse a macro declaration.
574 This function takes a macro declaration and breaks it into individual
575 parameter declarations.
577 Args:
578 declaration (str): the declaration to parse
579 namefunc (func): function to apply to name
581 Returns:
582 dict: map of (symbol, decl) pairs describing the macro
585 result = OrderedDict()
587 logging.debug('decl=[%s]', declaration)
589 m = re.search(r'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration)
590 if m:
591 params = m.group(1)
592 params = re.sub(r'\n', '', params)
594 logging.debug('params=[%s]', params)
596 for param in params.split(','):
597 param = param.strip()
599 # Allow varargs variations
600 if param.endswith('...'):
601 param = '...'
603 if param != '':
604 result[param] = namefunc(param)
606 return result