README: add some build instructions
[gtk-doc.git] / gtkdoc / common.py
blobbd119500e6e3a7e03acb503735745d9476baef8b
1 # -*- python -*-
3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 2001 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 from collections import OrderedDict
23 import logging
24 import os
25 import re
26 import subprocess
27 import sys
29 from . import config
32 def setup_logging():
33 """Check GTKDOC_TRACE environment variable.
35 Set python log level to the value of the environment variable (DEBUG, INFO,
36 WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
37 """
38 log_level = os.environ.get('GTKDOC_TRACE', 'WARNING')
39 if log_level == '':
40 log_level = 'WARNING'
41 logging.basicConfig(stream=sys.stdout,
42 level=logging.getLevelName(log_level.upper()),
43 format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
44 # When redirecting the output and running with a non utf-8 locale
45 # we get UnicodeEncodeError:
46 encoding = sys.stdout.encoding
47 if 'PYTHONIOENCODING' not in os.environ and (not encoding or encoding != 'UTF-8'):
48 sys.stdout.flush()
49 sys.stdout = open(sys.stdout.fileno(), mode='w', encoding='utf8', buffering=1)
52 def UpdateFileIfChanged(old_file, new_file, make_backup):
53 """Compares the old version of the file with the new version and if the
54 file has changed it moves the new version into the old versions place. This
55 is used so we only change files if needed, so we can do proper dependency
56 tracking.
58 Args:
59 old_file (str): The pathname of the old file.
60 new_file (str): The pathname of the new version of the file.
61 make_backup (bool): True if a backup of the old file should be kept.
62 It will have the .bak suffix added to the file name.
64 Returns:
65 bool: It returns False if the file hasn't changed, and True if it has.
66 """
68 logging.debug("Comparing %s with %s...", old_file, new_file)
70 if os.path.exists(old_file):
71 old_contents = new_contents = None
72 with open(old_file, 'rb') as f:
73 old_contents = f.read()
74 with open(new_file, 'rb') as f:
75 new_contents = f.read()
76 if old_contents == new_contents:
77 os.unlink(new_file)
78 logging.debug("-> content is the same.")
79 return False
81 if make_backup:
82 backupname = old_file + '.bak'
83 if os.path.exists(backupname):
84 os.unlink(backupname)
85 os.rename(old_file, backupname)
86 else:
87 os.unlink(old_file)
88 logging.debug("-> content differs.")
89 else:
90 logging.debug("-> %s created.", old_file)
92 os.rename(new_file, old_file)
93 return True
96 def GetModuleDocDir(module_name):
97 """Get the docdir for the given module via pkg-config
99 Args:
100 module_name (string): The module, e.g. 'glib-2.0'
102 Returns:
103 str: the doc directory or None
105 path = None
106 try:
107 path = subprocess.check_output([config.pkg_config, '--variable=prefix', module_name], universal_newlines=True)
108 except subprocess.CalledProcessError:
109 return None
110 return os.path.join(path.strip(), 'share/gtk-doc/html')
113 def LogWarning(filename, line, message):
114 """Log a warning in gcc style format
116 Args:
117 file (str): The file the error comes from
118 line (int): line number in the file
119 message (str): the error message to print
121 filename = filename or "unknown"
123 # TODO: write to stderr
124 print("%s:%d: warning: %s" % (filename, line, message))
127 def CreateValidSGMLID(xml_id):
128 """Creates a valid SGML 'id' from the given string.
130 According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
131 tokens must begin with a letter ([A-Za-z]) and may be followed by any number
132 of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
133 and periods (".")."
135 When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
136 prevent name clashes (SGML ids are case-insensitive). (It basically never is
137 the case that mixed-case identifiers would collide.)
139 Args:
140 id (str): The text to be converted into a valid SGML id.
142 Returns:
143 str: The converted id.
146 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
147 if xml_id == '_':
148 return "gettext-macro"
150 xml_id = re.sub(r'[,;]', '', xml_id)
151 xml_id = re.sub(r'[_ ]', '-', xml_id)
152 xml_id = re.sub(r'^-+', '', xml_id)
153 xml_id = xml_id.replace('::', '-')
154 xml_id = xml_id.replace(':', '--')
156 # Append ":CAPS" to all all-caps identifiers
157 # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
158 if xml_id.isupper() and not xml_id.endswith('-CAPS'):
159 xml_id += ':CAPS'
161 return xml_id
164 # Parsing helpers (move to mkdb ?)
166 class ParseError(Exception):
167 pass
170 def PreprocessStructOrEnum(declaration):
171 """Trim a type declaration for display.
173 Removes private sections and comments from the declaration.
175 Args:
176 declaration (str): the type declaration (struct or enum)
178 Returns:
179 str: the trimmed declaration
181 # Remove private symbols
182 # Assume end of declaration if line begins with '}'
183 declaration = re.sub(r'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
184 '', declaration, flags=re.MULTILINE | re.DOTALL)
186 # Remove all other comments
187 declaration = re.sub(r'\n\s*/\*.*?\*/\s*\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
188 declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
189 declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
190 declaration = re.sub(r'//.*', '', declaration)
192 return declaration
195 # TODO: output_function_params is always passed as 0
196 # TODO: we always pass both functions
197 def ParseStructDeclaration(declaration, is_object, output_function_params, typefunc=None, namefunc=None):
198 """ Parse a struct declaration.
200 Takes a structure declaration and breaks it into individual type declarations.
202 Args:
203 declaration (str): the declaration to parse
204 is_object (bool): true if this is an object structure
205 output_function_params (bool): true if full type is wanted for function pointer members
206 typefunc (func): function to apply to type
207 namefunc (func): function to apply to name
209 Returns:
210 dict: map of (symbol, decl) pairs describing the public declaration
213 # For forward struct declarations just return an empty array.
214 if re.search(r'(?:struct|union)\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
215 return {}
217 # Remove all private parts of the declaration
218 # For objects, assume private
219 if is_object:
220 declaration = re.sub(r'''((?:struct|union)\s+\w*\s*\{)
222 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
223 r'\1', declaration, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
225 # Remove g_iface, parent_instance and parent_class if they are first member
226 declaration = re.sub(r'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r'\1', declaration)
228 declaration = PreprocessStructOrEnum(declaration)
230 if declaration.strip() == '':
231 return {}
233 # Prime match after "struct/union {" declaration
234 match = re.search(r'(?:struct|union)\s+\w*\s*\{', declaration, flags=re.MULTILINE | re.DOTALL)
235 if not match:
236 raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration)
238 logging.debug('public fields in struct/union: %s', declaration)
240 result = OrderedDict()
242 # Treat lines in sequence, allowing singly nested anonymous structs and unions.
243 for m in re.finditer(r'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
244 line = m.group(1)
246 logging.debug('checking "%s"', line)
248 if re.search(r'^\s*\}\s*\w*\s*$', line):
249 break
251 # FIXME: Just ignore nested structs and unions for now
252 if '{' in line:
253 continue
255 # ignore preprocessor directives
256 line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
258 if re.search(r'^\s*\}\s*\w*\s*$', line):
259 break
261 func_match = re.search(r'''^
262 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
263 (\w+)\s* # type
264 (\**(?:\s*restrict)?)\s* # ptr1
265 (const\s+)? # mod2
266 (\**\s*) # ptr2
267 (const\s+)? # mod3
268 \(\s*\*\s*(\w+)\s*\)\s* # name
269 \(([^)]*)\)\s* # func_params
270 $''', line, flags=re.VERBOSE)
271 vars_match = re.search(r'''^
272 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
273 (\w+)\s* # type
274 (\** \s* const\s+)? # mod2
275 (.*) # variables
276 $''', line, flags=re.VERBOSE)
278 # Try to match structure members which are functions
279 if func_match:
280 mod1 = func_match.group(1) or ''
281 if func_match.group(2):
282 mod1 += func_match.group(2)
283 func_type = func_match.group(3)
284 ptr1 = func_match.group(4)
285 mod2 = func_match.group(5) or ''
286 ptr2 = func_match.group(6)
287 mod3 = func_match.group(7) or ''
288 name = func_match.group(8)
289 func_params = func_match.group(9)
290 ptype = func_type
291 if typefunc:
292 ptype = typefunc(func_type, '<type>%s</type>' % func_type)
293 pname = name
294 if namefunc:
295 pname = namefunc(name)
297 if output_function_params:
298 result[name] = '%s%s%s%s%s%s&#160;(*%s)&#160;(%s)' % (
299 mod1, ptype, ptr1, mod2, ptr2, mod3, pname, func_params)
300 else:
301 result[name] = '%s&#160;()' % pname
303 # Try to match normal struct fields of comma-separated variables/
304 elif vars_match:
305 mod1 = vars_match.group(1) or ''
306 if vars_match.group(2):
307 mod1 += vars_match.group(2)
308 vtype = vars_match.group(3)
309 ptype = vtype
310 if typefunc:
311 ptype = typefunc(vtype, '<type>%s</type>' % vtype)
312 mod2 = vars_match.group(4) or ''
313 if mod2:
314 mod2 = ' ' + mod2
315 var_list = vars_match.group(5)
317 logging.debug('"%s" "%s" "%s" "%s"', mod1, vtype, mod2, var_list)
319 mod1 = mod1.replace(' ', '&#160;')
320 mod2 = mod2.replace(' ', '&#160;')
322 for n in var_list.split(','):
323 # Each variable can have any number of '*' before the identifier,
324 # and be followed by any number of pairs of brackets or a bit field specifier.
325 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
326 m = re.search(
327 r'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
328 n, flags=re.VERBOSE)
329 if m:
330 ptrs = m.group(1)
331 name = m.group(2)
332 array = m.group(3) or ''
333 bits = m.group(4)
334 if bits:
335 bits = ' ' + bits
336 else:
337 bits = ''
338 if ptrs and not ptrs.endswith('*'):
339 ptrs += ' '
341 array = array.replace(' ', '&#160;')
342 bits = bits.replace(' ', '&#160;')
344 pname = name
345 if namefunc:
346 pname = namefunc(name)
348 result[name] = '%s%s%s&#160;%s%s%s%s;' % (mod1, ptype, mod2, ptrs, pname, array, bits)
350 logging.debug('Matched line: %s%s%s %s%s%s%s', mod1, ptype, mod2, ptrs, pname, array, bits)
351 else:
352 logging.warning('Cannot parse struct field: "%s"', n)
354 else:
355 logging.warning('Cannot parse structure field: "%s"', line)
357 return result
360 def ParseEnumDeclaration(declaration):
361 """Parse an enum declaration.
363 This function takes a enumeration declaration and breaks it into individual
364 enum member declarations.
366 Args:
367 declaration (str): the declaration to parse
369 Returns:
370 str: list of strings describing the public declaration
373 # For forward struct declarations just return an empty array.
374 if re.search(r'enum\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
375 return ()
377 declaration = PreprocessStructOrEnum(declaration)
379 if declaration.strip() == '':
380 return ()
382 result = []
384 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
385 # to avoid getting confused by commas they might contain. This doesn't
386 # handle nested parentheses correctly.
387 declaration = re.sub(r'\([^)\n]+\)', '', declaration)
389 # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
390 # confused with end of enumeration.
391 # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
392 declaration = re.sub(r'\'.\'', '', declaration)
394 # Remove comma from comma - possible whitespace - closing brace sequence
395 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
396 # result in an actual enum member
397 declaration = re.sub(r',(\s*})', r'\1', declaration)
399 # Prime match after "typedef enum {" declaration
400 match = re.search(r'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration, flags=re.MULTILINE | re.DOTALL)
401 if not match:
402 raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration)
404 logging.debug("public fields in enum: %s'", declaration)
406 # Treat lines in sequence.
407 for m in re.finditer(r'\s*([^,\}]+)([,\}])', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
408 line = m.group(1)
409 terminator = m.group(2)
411 # ignore preprocessor directives
412 line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
414 m1 = re.search(r'^(\w+)\s*(=.*)?$', line, flags=re.MULTILINE | re.DOTALL)
415 # Special case for GIOCondition, where the values are specified by
416 # macros which expand to include the equal sign like '=1'.
417 m2 = re.search(r'^(\w+)\s*GLIB_SYSDEF_POLL', line, flags=re.MULTILINE | re.DOTALL)
418 if m1:
419 result.append(m1.group(1))
420 elif m2:
421 result.append(m2.group(1))
422 elif line.strip().startswith('#'):
423 # Special case include of <gdk/gdkcursors.h>, just ignore it
424 # Special case for #ifdef/#else/#endif, just ignore it
425 break
426 else:
427 logging.warning('Cannot parse enumeration member: %s', line)
429 if terminator == '}':
430 break
432 return result
435 def ParseFunctionDeclaration(declaration, typefunc, namefunc):
436 """Parse a function declaration.
438 This function takes a function declaration and breaks it into individual
439 parameter declarations.
441 Args:
442 declaration (str): the declaration to parse
443 typefunc (func): function to apply to type
444 namefunc (func): function to apply to name
446 Returns:
447 dict: map of (symbol, decl) pairs describing the prototype
450 result = OrderedDict()
452 param_num = 0
453 while declaration:
454 logging.debug('decl=[%s]', declaration)
456 # skip whitespace and commas
457 declaration, n = re.subn(r'^[\s,]+', '', declaration)
458 if n:
459 continue
461 declaration, n = re.subn(r'^void\s*[,\n]', '', declaration)
462 if n:
463 if param_num != 0:
464 logging.warning('void used as parameter %d in function %s', param_num, declaration)
465 result['void'] = namefunc('<type>void</type>')
466 param_num += 1
467 continue
469 declaration, n = re.subn(r'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration)
470 if n:
471 result['...'] = namefunc('...')
472 param_num += 1
473 continue
475 # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
476 # $1 $2 $3 $4 $5
477 regex = r'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
478 m = re.match(regex, declaration)
479 if m:
480 declaration = re.sub(regex, '', declaration)
482 pre = m.group(1) or ''
483 type = m.group(2)
484 ptr = m.group(3) or ''
485 name = m.group(4) or ''
486 array = m.group(5) or ''
488 pre = re.sub(r'\s+', ' ', pre)
489 type = re.sub(r'\s+', ' ', type)
490 ptr = re.sub(r'\s+', ' ', ptr)
491 ptr = re.sub(r'\s+$', '', ptr)
492 if ptr and not ptr.endswith('*'):
493 ptr += ' '
495 logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
497 m = re.search(r'^((un)?signed .*)\s?', pre)
498 if name == '' and m:
499 name = type
500 type = m.group(1)
501 pre = ''
503 if name == '':
504 name = 'Param' + str(param_num + 1)
506 logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
508 xref = typefunc(type, '<type>%s</type>' % type)
509 result[name] = namefunc('%s%s %s%s%s' % (pre, xref, ptr, name, array))
510 param_num += 1
511 continue
513 # Try to match parameters which are functions
514 # $1 $2 $3 $4 $5 $6 $7 $8
515 regex = r'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
516 m = re.match(regex, declaration)
517 if m:
518 declaration = re.sub(regex, '', declaration)
520 mod1 = m.group(1) or ''
521 if m.group(2):
522 mod1 += m.group(2)
523 type = m.group(3)
524 ptr1 = m.group(4)
525 mod2 = m.group(5) or ''
526 func_ptr = m.group(6)
527 name = m.group(7)
528 func_params = m.group(8) or ''
530 if ptr1 and not ptr1.endswith('*'):
531 ptr1 += ' '
532 func_ptr = re.sub(r'\s+', ' ', func_ptr)
534 logging.debug('"%s" "%s" "%s" "%s" "%s"', mod1, type, mod2, func_ptr, name)
536 xref = typefunc(type, '<type>%s</type>' % type)
537 result[name] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1, xref, ptr1, mod2, func_ptr, name, func_params))
538 param_num += 1
539 continue
541 logging.warning('Cannnot parse args for function in "%s"', declaration)
542 break
544 return result
547 def ParseMacroDeclaration(declaration, namefunc):
548 """Parse a macro declaration.
550 This function takes a macro declaration and breaks it into individual
551 parameter declarations.
553 Args:
554 declaration (str): the declaration to parse
555 namefunc (func): function to apply to name
557 Returns:
558 dict: map of (symbol, decl) pairs describing the macro
561 result = OrderedDict()
563 logging.debug('decl=[%s]', declaration)
565 m = re.search(r'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration)
566 if m:
567 params = m.group(1)
568 params = re.sub(r'\n', '', params)
570 logging.debug('params=[%s]', params)
572 for param in params.split(','):
573 param = param.strip()
575 # Allow varargs variations
576 if param.endswith('...'):
577 param = '...'
579 if param != '':
580 result[param] = namefunc(param)
582 return result