gtkdoc/common.py

   1 # -*- python -*-
   2 #
   3 # gtk-doc - GTK DocBook documentation generator.
   4 # Copyright (C) 2001  Damon Chaplin
   5 #               2007-2016  Stefan Sauer
   6 #
   7 # This program is free software; you can redistribute it and/or modify
   8 # it under the terms of the GNU General Public License as published by
   9 # the Free Software Foundation; either version 2 of the License, or
  10 # (at your option) any later version.
  11 #
  12 # This program is distributed in the hope that it will be useful,
  13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 # GNU General Public License for more details.
  16 #
  17 # You should have received a copy of the GNU General Public License
  18 # along with this program; if not, write to the Free Software
  19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  20 #
  21
  22 # Support both Python 2 and 3
  23 from __future__ import print_function
  24
  25 from collections import OrderedDict
  26 import logging
  27 import os
  28 import re
  29 import subprocess
  30 import sys
  31 import six
  32 import codecs
  33
  34 from . import config
  35
  36
  37 def open_text(filename, mode="r", encoding="utf-8"):
  38     """An open() which removes some differences between Python 2 and 3 and
  39     has saner defaults.
  40
  41     Unlike the builtin open by default utf-8 is use and not the locale
  42     encoding (which is ANSI on Windows for example, not very helpful)
  43
  44     For Python 2, files are opened in text mode like with Python 3.
  45     """
  46
  47     if mode not in ("r", "w"):
  48         raise ValueError("mode %r not supported, must be 'r' or 'w'" % mode)
  49
  50     if six.PY3:
  51         return open(filename, mode, encoding=encoding)
  52     else:
  53         # We can't use io.open() here as its write method is too strict and
  54         # only allows unicode instances and not everything in the codebase
  55         # forces unicode at the moment. codecs.open() on the other hand
  56         # happily takes ASCII str and decodes it.
  57         return codecs.open(filename, mode, encoding=encoding)
  58
  59
  60 def setup_logging():
  61     """Check GTKDOC_TRACE environment variable.
  62
  63     Set python log level to the value of the environment variable (DEBUG, INFO,
  64     WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
  65     """
  66     log_level = os.environ.get('GTKDOC_TRACE')
  67     if log_level == '':
  68         log_level = 'INFO'
  69     if log_level:
  70         logging.basicConfig(stream=sys.stdout,
  71                             level=logging.getLevelName(log_level.upper()),
  72                             format='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
  73     # When redirecting the output on python2 we get UnicodeEncodeError:
  74     if not sys.stdout.encoding:
  75         import codecs
  76         sys.stdout = codecs.getwriter('utf8')(sys.stdout)
  77
  78
  79 def UpdateFileIfChanged(old_file, new_file, make_backup):
  80     """Compares the old version of the file with the new version and if the
  81     file has changed it moves the new version into the old versions place. This
  82     is used so we only change files if needed, so we can do proper dependency
  83     tracking.
  84
  85     Args:
  86         old_file (str): The pathname of the old file.
  87         new_file (str): The pathname of the new version of the file.
  88         make_backup (bool): True if a backup of the old file should be kept.
  89                            It will have the .bak suffix added to the file name.
  90
  91     Returns:
  92         bool: It returns False if the file hasn't changed, and True if it has.
  93     """
  94
  95     logging.debug("Comparing %s with %s...", old_file, new_file)
  96
  97     if os.path.exists(old_file):
  98         old_contents = open(old_file, 'rb').read()
  99         new_contents = open(new_file, 'rb').read()
 100         if old_contents == new_contents:
 101             os.unlink(new_file)
 102             logging.debug("-> content is the same.")
 103             return False
 104
 105         if make_backup:
 106             backupname = old_file + '.bak'
 107             if os.path.exists(backupname):
 108                 os.unlink(backupname)
 109             os.rename(old_file, backupname)
 110         else:
 111             os.unlink(old_file)
 112         logging.debug("-> content differs.")
 113     else:
 114         logging.debug("-> %s created.", old_file)
 115
 116     os.rename(new_file, old_file)
 117     return True
 118
 119
 120 def GetModuleDocDir(module_name):
 121     """Get the docdir for the given module via pkg-config
 122
 123     Args:
 124       module_name (string): The module, e.g. 'glib-2.0'
 125
 126     Returns:
 127       str: the doc directory or None
 128     """
 129     path = None
 130     try:
 131         path = subprocess.check_output([config.pkg_config, '--variable=prefix', module_name], universal_newlines=True)
 132     except subprocess.CalledProcessError:
 133         return None
 134     return os.path.join(path.strip(), 'share/gtk-doc/html')
 135
 136
 137 def LogWarning(filename, line, message):
 138     """Log a warning in gcc style format
 139
 140     Args:
 141       file (str): The file the error comes from
 142       line (int): line number in the file
 143       message (str): the error message to print
 144     """
 145     filename = filename or "unknown"
 146
 147     # TODO: write to stderr
 148     print ("%s:%d: warning: %s" % (filename, line, message))
 149
 150
 151 def CreateValidSGMLID(xml_id):
 152     """Creates a valid SGML 'id' from the given string.
 153
 154     According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
 155     tokens must begin with a letter ([A-Za-z]) and may be followed by any number
 156     of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
 157     and periods (".")."
 158
 159     When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
 160     prevent name clashes (SGML ids are case-insensitive). (It basically never is
 161     the case that mixed-case identifiers would collide.)
 162
 163     Args:
 164       id (str): The text to be converted into a valid SGML id.
 165
 166     Returns:
 167       str: The converted id.
 168     """
 169
 170     # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
 171     if xml_id == '_':
 172         return "gettext-macro"
 173
 174     xml_id = re.sub(r'[,;]', '', xml_id)
 175     xml_id = re.sub(r'[_ ]', '-', xml_id)
 176     xml_id = re.sub(r'^-+', '', xml_id)
 177     xml_id = xml_id.replace('::', '-')
 178     xml_id = xml_id.replace(':', '--')
 179
 180     # Append ":CAPS" to all all-caps identifiers
 181     # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
 182     if xml_id.isupper() and not xml_id.endswith('-CAPS'):
 183         xml_id += ':CAPS'
 184
 185     return xml_id
 186
 187
 188 # Parsing helpers (move to mkdb ?)
 189
 190 class ParseError(Exception):
 191     pass
 192
 193
 194 def PreprocessStructOrEnum(declaration):
 195     """Trim a type declaration for display.
 196
 197     Removes private sections and comments from the declaration.
 198
 199     Args:
 200       declaration (str): the type declaration (struct or enum)
 201
 202     Returns:
 203       str: the trimmed declaration
 204     """
 205     # Remove private symbols
 206     # Assume end of declaration if line begins with '}'
 207     declaration = re.sub(r'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
 208                          '', declaration, flags=re.MULTILINE | re.DOTALL)
 209
 210     # Remove all other comments
 211     declaration = re.sub(r'\n\s*/\*.*?\*/\s*\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
 212     declaration = re.sub(r'/\*([^*]+|\*(?!/))*\*/', r' ', declaration)
 213     declaration = re.sub(r'\n\s*//.*?\n', r'\n', declaration, flags=re.MULTILINE | re.DOTALL)
 214     declaration = re.sub(r'//.*', '', declaration)
 215
 216     return declaration
 217
 218
 219 # TODO: output_function_params is always passed as 0
 220 # TODO: we always pass both functions
 221 def ParseStructDeclaration(declaration, is_object, output_function_params, typefunc=None, namefunc=None):
 222     """ Parse a struct declaration.
 223
 224     Takes a structure declaration and breaks it into individual type declarations.
 225
 226     Args:
 227       declaration (str): the declaration to parse
 228       is_object (bool): true if this is an object structure
 229       output_function_params (bool): true if full type is wanted for function pointer members
 230       typefunc (func): function to apply to type
 231       namefunc (func): function to apply to name
 232
 233     Returns:
 234       dict: map of (symbol, decl) pairs describing the public declaration
 235     """
 236
 237     # For forward struct declarations just return an empty array.
 238     if re.search(r'(?:struct|union)\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
 239         return {}
 240
 241     # Remove all private parts of the declaration
 242     # For objects, assume private
 243     if is_object:
 244         declaration = re.sub(r'''((?:struct|union)\s+\w*\s*\{)
 245                                  .*?
 246                                  (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
 247                              r'\1', declaration, flags=re.MULTILINE | re.DOTALL | re.VERBOSE)
 248
 249     # Remove g_iface, parent_instance and parent_class if they are first member
 250     declaration = re.sub(r'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r'\1', declaration)
 251
 252     declaration = PreprocessStructOrEnum(declaration)
 253
 254     if declaration.strip() == '':
 255         return {}
 256
 257     # Prime match after "struct/union {" declaration
 258     match = re.search(r'(?:struct|union)\s+\w*\s*\{', declaration, flags=re.MULTILINE | re.DOTALL)
 259     if not match:
 260         raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration)
 261
 262     logging.debug('public fields in struct/union: %s', declaration)
 263
 264     result = OrderedDict()
 265
 266     # Treat lines in sequence, allowing singly nested anonymous structs and unions.
 267     for m in re.finditer(r'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
 268         line = m.group(1)
 269
 270         logging.debug('checking "%s"', line)
 271
 272         if re.search(r'^\s*\}\s*\w*\s*$', line):
 273             break
 274
 275         # FIXME: Just ignore nested structs and unions for now
 276         if '{' in line:
 277             continue
 278
 279         # ignore preprocessor directives
 280         line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
 281
 282         if re.search(r'^\s*\}\s*\w*\s*$', line):
 283             break
 284
 285         func_match = re.search(r'''^
 286                                    (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)?  # mod1
 287                                    (\w+)\s*                             # type
 288                                    (\**(?:\s*restrict)?)\s*             # ptr1
 289                                    (const\s+)?                          # mod2
 290                                    (\**\s*)                             # ptr2
 291                                    (const\s+)?                          # mod3
 292                                    \(\s*\*\s*(\w+)\s*\)\s*              # name
 293                                    \(([^)]*)\)\s*                       # func_params
 294                                    $''', line, flags=re.VERBOSE)
 295         vars_match = re.search(r'''^
 296                                    ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
 297                                    (\w+)\s*                            # type
 298                                    (\** \s* const\s+)?                 # mod2
 299                                    (.*)                                # variables
 300                                    $''', line, flags=re.VERBOSE)
 301
 302         # Try to match structure members which are functions
 303         if func_match:
 304             mod1 = func_match.group(1) or ''
 305             if func_match.group(2):
 306                 mod1 += func_match.group(2)
 307             func_type = func_match.group(3)
 308             ptr1 = func_match.group(4)
 309             mod2 = func_match.group(5) or ''
 310             ptr2 = func_match.group(6)
 311             mod3 = func_match.group(7) or ''
 312             name = func_match.group(8)
 313             func_params = func_match.group(9)
 314             ptype = func_type
 315             if typefunc:
 316                 ptype = typefunc(func_type, '<type>%s</type>' % func_type)
 317             pname = name
 318             if namefunc:
 319                 pname = namefunc(name)
 320
 321             if output_function_params:
 322                 result[name] = '%s%s%s%s%s%s&#160;(*%s)&#160;(%s)' % (
 323                     mod1, ptype, ptr1, mod2, ptr2, mod3, pname, func_params)
 324             else:
 325                 result[name] = '%s&#160;()' % pname
 326
 327         # Try to match normal struct fields of comma-separated variables/
 328         elif vars_match:
 329             mod1 = vars_match.group(1) or ''
 330             if vars_match.group(2):
 331                 mod1 += vars_match.group(2)
 332             vtype = vars_match.group(3)
 333             ptype = vtype
 334             if typefunc:
 335                 ptype = typefunc(vtype, '<type>%s</type>' % vtype)
 336             mod2 = vars_match.group(4) or ''
 337             if mod2:
 338                 mod2 = ' ' + mod2
 339             var_list = vars_match.group(5)
 340
 341             logging.debug('"%s" "%s" "%s" "%s"', mod1, vtype, mod2, var_list)
 342
 343             mod1 = mod1.replace(' ', '&#160;')
 344             mod2 = mod2.replace(' ', '&#160;')
 345
 346             for n in var_list.split(','):
 347                 # Each variable can have any number of '*' before the identifier,
 348                 # and be followed by any number of pairs of brackets or a bit field specifier.
 349                 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
 350                 m = re.search(
 351                     r'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
 352                     n, flags=re.VERBOSE)
 353                 if m:
 354                     ptrs = m.group(1)
 355                     name = m.group(2)
 356                     array = m.group(3) or ''
 357                     bits = m.group(4)
 358                     if bits:
 359                         bits = ' ' + bits
 360                     else:
 361                         bits = ''
 362                     if ptrs and not ptrs.endswith('*'):
 363                         ptrs += ' '
 364
 365                     array = array.replace(' ', '&#160;')
 366                     bits = bits.replace(' ', '&#160;')
 367
 368                     pname = name
 369                     if namefunc:
 370                         pname = namefunc(name)
 371
 372                     result[name] = '%s%s%s&#160;%s%s%s%s;' % (mod1, ptype, mod2, ptrs, pname, array, bits)
 373
 374                     logging.debug('Matched line: %s%s%s %s%s%s%s', mod1, ptype, mod2, ptrs, pname, array, bits)
 375                 else:
 376                     logging.warning('Cannot parse struct field: "%s"', n)
 377
 378         else:
 379             logging.warning('Cannot parse structure field: "%s"', line)
 380
 381     return result
 382
 383
 384 def ParseEnumDeclaration(declaration):
 385     """Parse an enum declaration.
 386
 387     This function takes a enumeration declaration and breaks it into individual
 388     enum member declarations.
 389
 390     Args:
 391       declaration (str): the declaration to parse
 392
 393     Returns:
 394       str: list of strings describing the public declaration
 395     """
 396
 397     # For forward struct declarations just return an empty array.
 398     if re.search(r'enum\s+\S+\s*;', declaration, flags=re.MULTILINE | re.DOTALL):
 399         return ()
 400
 401     declaration = PreprocessStructOrEnum(declaration)
 402
 403     if declaration.strip() == '':
 404         return ()
 405
 406     result = []
 407
 408     # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
 409     # to avoid getting confused by commas they might contain. This doesn't
 410     # handle nested parentheses correctly.
 411     declaration = re.sub(r'\([^)\n]+\)', '', declaration)
 412
 413     # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
 414     # confused with end of enumeration.
 415     # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
 416     declaration = re.sub(r'\'.\'', '', declaration)
 417
 418     # Remove comma from comma - possible whitespace - closing brace sequence
 419     # since it is legal in GNU C and C99 to have a trailing comma but doesn't
 420     # result in an actual enum member
 421     declaration = re.sub(r',(\s*})', r'\1', declaration)
 422
 423     # Prime match after "typedef enum {" declaration
 424     match = re.search(r'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration, flags=re.MULTILINE | re.DOTALL)
 425     if not match:
 426         raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration)
 427
 428     logging.debug("public fields in enum: %s'", declaration)
 429
 430     # Treat lines in sequence.
 431     for m in re.finditer(r'\s*([^,\}]+)([,\}])', declaration[match.end():], flags=re.MULTILINE | re.DOTALL):
 432         line = m.group(1)
 433         terminator = m.group(2)
 434
 435         # ignore preprocessor directives
 436         line = re.sub(r'^#.*?\n\s*', '', line, flags=re.MULTILINE | re.DOTALL)
 437
 438         m1 = re.search(r'^(\w+)\s*(=.*)?$', line, flags=re.MULTILINE | re.DOTALL)
 439         # Special case for GIOCondition, where the values are specified by
 440         # macros which expand to include the equal sign like '=1'.
 441         m2 = re.search(r'^(\w+)\s*GLIB_SYSDEF_POLL', line, flags=re.MULTILINE | re.DOTALL)
 442         if m1:
 443             result.append(m1.group(1))
 444         elif m2:
 445             result.append(m2.group(1))
 446         elif line.strip().startswith('#'):
 447             # Special case include of <gdk/gdkcursors.h>, just ignore it
 448             # Special case for #ifdef/#else/#endif, just ignore it
 449             break
 450         else:
 451             logging.warning('Cannot parse enumeration member: %s', line)
 452
 453         if terminator == '}':
 454             break
 455
 456     return result
 457
 458
 459 def ParseFunctionDeclaration(declaration, typefunc, namefunc):
 460     """Parse a function declaration.
 461
 462     This function takes a function declaration and breaks it into individual
 463     parameter declarations.
 464
 465     Args:
 466       declaration (str): the declaration to parse
 467       typefunc (func): function to apply to type
 468       namefunc (func): function to apply to name
 469
 470     Returns:
 471       dict: map of (symbol, decl) pairs describing the prototype
 472     """
 473
 474     result = OrderedDict()
 475
 476     param_num = 0
 477     while declaration:
 478         logging.debug('decl=[%s]', declaration)
 479
 480         # skip whitespace and commas
 481         declaration, n = re.subn(r'^[\s,]+', '', declaration)
 482         if n:
 483             continue
 484
 485         declaration, n = re.subn(r'^void\s*[,\n]', '', declaration)
 486         if n:
 487             if param_num != 0:
 488                 logging.warning('void used as parameter %d in function %s', param_num, declaration)
 489             result['void'] = namefunc('<type>void</type>')
 490             param_num += 1
 491             continue
 492
 493         declaration, n = re.subn(r'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration)
 494         if n:
 495             result['...'] = namefunc('...')
 496             param_num += 1
 497             continue
 498
 499         # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
 500         #              $1                                                                                                                                            $2                             $3                                                                                                $4       $5
 501         regex = r'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
 502         m = re.match(regex, declaration)
 503         if m:
 504             declaration = re.sub(regex, '', declaration)
 505
 506             pre = m.group(1) or ''
 507             type = m.group(2)
 508             ptr = m.group(3) or ''
 509             name = m.group(4) or ''
 510             array = m.group(5) or ''
 511
 512             pre = re.sub(r'\s+', ' ', pre)
 513             type = re.sub(r'\s+', ' ', type)
 514             ptr = re.sub(r'\s+', ' ', ptr)
 515             ptr = re.sub(r'\s+$', '', ptr)
 516             if ptr and not ptr.endswith('*'):
 517                 ptr += ' '
 518
 519             logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
 520
 521             m = re.search(r'^((un)?signed .*)\s?', pre)
 522             if name == '' and m:
 523                 name = type
 524                 type = m.group(1)
 525                 pre = ''
 526
 527             if name == '':
 528                 name = 'Param' + str(param_num + 1)
 529
 530             logging.debug('"%s" "%s" "%s" "%s" "%s"', pre, type, ptr, name, array)
 531
 532             xref = typefunc(type, '<type>%s</type>' % type)
 533             result[name] = namefunc('%s%s %s%s%s' % (pre, xref, ptr, name, array))
 534             param_num += 1
 535             continue
 536
 537         # Try to match parameters which are functions
 538         #           $1                                                                  $2          $3      $4                        $5              $6            $7             $8
 539         regex = r'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
 540         m = re.match(regex, declaration)
 541         if m:
 542             declaration = re.sub(regex, '', declaration)
 543
 544             mod1 = m.group(1) or ''
 545             if m.group(2):
 546                 mod1 += m.group(2)
 547             type = m.group(3)
 548             ptr1 = m.group(4)
 549             mod2 = m.group(5) or ''
 550             func_ptr = m.group(6)
 551             name = m.group(7)
 552             func_params = m.group(8) or ''
 553
 554             if ptr1 and not ptr1.endswith('*'):
 555                 ptr1 += ' '
 556             func_ptr = re.sub(r'\s+', ' ', func_ptr)
 557
 558             logging.debug('"%s" "%s" "%s" "%s" "%s"', mod1, type, mod2, func_ptr, name)
 559
 560             xref = typefunc(type, '<type>%s</type>' % type)
 561             result[name] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1, xref, ptr1, mod2, func_ptr, name, func_params))
 562             param_num += 1
 563             continue
 564
 565         logging.warning('Cannnot parse args for function in "%s"', declaration)
 566         break
 567
 568     return result
 569
 570
 571 def ParseMacroDeclaration(declaration, namefunc):
 572     """Parse a macro declaration.
 573
 574     This function takes a macro declaration and breaks it into individual
 575     parameter declarations.
 576
 577     Args:
 578       declaration (str): the declaration to parse
 579       namefunc (func): function to apply to name
 580
 581     Returns:
 582       dict: map of (symbol, decl) pairs describing the macro
 583     """
 584
 585     result = OrderedDict()
 586
 587     logging.debug('decl=[%s]', declaration)
 588
 589     m = re.search(r'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration)
 590     if m:
 591         params = m.group(1)
 592         params = re.sub(r'\n', '', params)
 593
 594         logging.debug('params=[%s]', params)
 595
 596         for param in params.split(','):
 597             param = param.strip()
 598
 599             # Allow varargs variations
 600             if param.endswith('...'):
 601                 param = '...'
 602
 603             if param != '':
 604                 result[param] = namefunc(param)
 605
 606     return result