gtkdoc: remove shebang from module files
[gtk-doc.git] / gtkdoc-scan.in
blob688929358b96f75102bf639ec3c78acb7306b12a
1 #!@PYTHON@
2 # -*- python -*-
4 # gtk-doc - GTK DocBook documentation generator.
5 # Copyright (C) 1998  Damon Chaplin
6 #               2007-2016  Stefan Sauer
8 # This program is free software; you can redistribute it and/or modify
9 # it under the terms of the GNU General Public License as published by
10 # the Free Software Foundation; either version 2 of the License, or
11 # (at your option) any later version.
13 # This program is distributed in the hope that it will be useful,
14 # but WITHOUT ANY WARRANTY; without even the implied warranty of
15 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 # GNU General Public License for more details.
18 # You should have received a copy of the GNU General Public License
19 # along with this program; if not, write to the Free Software
20 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
23 #############################################################################
24 # Script      : gtkdoc-scan
25 # Description : Extracts declarations of functions, macros, enums, structs
26 #                and unions from header files.
28 #                It is called with a module name, an optional source directory,
29 #                an optional output directory, and the header files to scan.
31 #                It outputs all declarations found to a file named
32 #                '$MODULE-decl.txt', and the list of decarations to another
33 #                file '$MODULE-decl-list.txt'.
35 #                This second list file is typically copied to
36 #                '$MODULE-sections.txt' and organized into sections ready to
37 #                output the SGML pages.
38 #############################################################################
40 from __future__ import print_function
42 import os, sys, argparse, re
43 import logging
44 sys.path.append('@PYTHON_PACKAGE_DIR@')
46 from gtkdoc import common, config
48 # do not read files twice; checking it here permits to give both srcdir and
49 # builddir as --source-dir without fear of duplicities
50 seen_headers = {}
52 parser = argparse.ArgumentParser(
53     description='gtkdoc-scan version %s - scan header files for public symbols' % config.version)
54 parser.add_argument('--version', action='version', version=config.version)
55 parser.add_argument('--module', default='', help='Name of the doc module being parsed.')
56 parser.add_argument('--source-dir', action='append', default=[],
57                     help='Directories containing the source files to scan')
58 parser.add_argument('--ignore-headers', default='',
59                     help='A space-separated list of header files/dirs not to scan')
60 parser.add_argument('--output-dir', default='.',
61                     help='The directory where the results are stored')
62 parser.add_argument('--deprecated-guards', default='',
63                     help='A |-separated list of symbols used as deprecation guards')
64 parser.add_argument('--ignore-decorators', default='',
65                     help='A |-separated list of additional decorators in'
66                     'declarations that should be ignored')
67 parser.add_argument('--rebuild-sections', action='store_true', default=False,
68                     help='Rebuild (overwrite) the MODULE-sections.txt file')
69 parser.add_argument('--rebuild-types', action='store_true', default=False,
70                     help='Automatically recreate the MODULE.types file using'
71                     'all the *_get_type() functions found')
72 parser.add_argument('headers', nargs='*')
74 def Run():
75     options = parser.parse_args()
76     if options.module == '':
77         print('Error, missing module.')
78         sys.exit(1)
80     if options.deprecated_guards == '':
81         options.deprecated_guards =  'does_not_match_any_cpp_symbols_at_all_nope'
83     if options.ignore_decorators == '':
84         options.ignore_decorators = '(?=no)match'
86     if not os.path.isdir(options.output_dir):
87         os.mkdir(options.output_dir)
89     base_filename = os.path.join(options.output_dir, options.module)
90     old_decl_list = base_filename + '-decl-list.txt'
91     new_decl_list = base_filename + '-decl-list.new'
92     old_decl = base_filename + '-decl.txt'
93     new_decl = base_filename + '-decl.new'
94     old_types = base_filename + '.types'
95     new_types = base_filename + '.types.new'
96     sections_file = base_filename + '-sections.txt'
98     # If this is the very first run then we create the .types file automatically.
99     if not os.path.exists(sections_file) and not os.path.exists(old_types):
100         options.rebuild_types = True
102     section_list = {}
103     decl_list = []
104     get_types = []
106     for file in options.headers:
107         ScanHeader(file, section_list, decl_list, get_types, options)
109     for dir in options.source_dir:
110         ScanHeaders(dir, section_list, decl_list, get_types, options)
112     with open(new_decl_list, 'w') as f:
113         for section in sorted(section_list.keys()):
114             f.write(section_list[section])
116     with open(new_decl, 'w') as f:
117         for decl in decl_list:
118             f.write(decl)
120     if options.rebuild_types:
121         with open(new_types, 'w') as f:
122             for func in sorted(get_types):
123                 f.write(func + '\n')
125         # remove the file if empty
126         if len(get_types) == 0:
127             os.unlink(new_types)
128             if os.path.exists(old_types):
129                 os.rename(old_types, old_types + '.bak')
130         else:
131             common.UpdateFileIfChanged(old_types, new_types, True)
133     common.UpdateFileIfChanged(old_decl_list, new_decl_list, True)
134     common.UpdateFileIfChanged(old_decl, new_decl, True)
136     # If there is no MODULE-sections.txt file yet or we are asked to rebuild it,
137     # we copy the MODULE-decl-list.txt file into its place. The user can tweak it
138     # later if they want.
139     if options.rebuild_sections or not os.path.exists(sections_file):
140         common.UpdateFileIfChanged(sections_file, old_decl_list, False)
142     # If there is no MODULE-overrides.txt file we create an empty one
143     # because EXTRA_DIST in gtk-doc.make requires it.
144     overrides_file = base_filename + '-overrides.txt'
145     if not os.path.exists(overrides_file):
146         open(overrides_file, 'w').close()
149 #############################################################################
150 # Function    : ScanHeaders
151 # Description : This scans a directory tree looking for header files.
153 # Arguments   : $source_dir - the directory to scan.
154 #               $section_list - a reference to the hashmap of sections.
155 #############################################################################
157 def ScanHeaders(source_dir, section_list, decl_list, get_types, options):
158     logging.info('Scanning source directory: %s', source_dir)
160     # This array holds any subdirectories found.
161     subdirs = []
163     for file in os.listdir(source_dir):
164         if file.startswith('.'):
165             continue
166         fullname = os.path.join(source_dir, file)
167         if os.path.isdir(fullname):
168             subdirs.append(file)
169         elif file.endswith('.h'):
170             ScanHeader(fullname, section_list, decl_list, get_types, options)
172     # Now recursively scan the subdirectories.
173     for dir in subdirs:
174         matchstr = r'(\s|^)' + re.escape(dir) + r'(\s|$)'
175         if re.search(matchstr, options.ignore_headers):
176             continue
177         ScanHeaders(os.path.join(source_dir, dir), section_list, decl_list,
178                     get_types, options)
181 #############################################################################
182 # Function    : ScanHeader
183 # Description : This scans a header file, looking for declarations of
184 #                functions, macros, typedefs, structs and unions, which it
185 #                outputs to the decl_list.
186 # Arguments   : $input_file - the header file to scan.
187 #               $section_list - a map of sections.
188 #               $decl_list - a list of declarations
189 # Returns     : it adds declarations to the appropriate list.
190 #############################################################################
192 def ScanHeader(input_file, section_list, decl_list, get_types, options):
193     global seen_headers
194     slist = []                  # Holds the resulting list of declarations.
195     title = ''                 # Holds the title of the section
196     in_comment = 0                  # True if we are in a comment.
197     in_declaration = ''          # The type of declaration we are in, e.g.
198                               #   'function' or 'macro'.
199     skip_block = 0                  # True if we should skip a block.
200     symbol=None                  # The current symbol being declared.
201     decl=''                          # Holds the declaration of the current symbol.
202     ret_type=None                  # For functions and function typedefs this
203                               #   holds the function's return type.
204     pre_previous_line = ''   # The pre-previous line read in - some Gnome
205                               #   functions have the return type on one
206                               #   line, the function name on the next,
207                               #   and the rest of the declaration after.
208     previous_line = ''          # The previous line read in - some Gnome
209                               #   functions have the return type on one line
210                               #   and the rest of the declaration after.
211     first_macro = 1          # Used to try to skip the standard #ifdef XXX
212                               #   #define XXX at the start of headers.
213     level = None                          # Used to handle structs/unions which contain
214                               #   nested structs or unions.
215     internal = 0             # Set to 1 for internal symbols, we need to
216                                 #   fully parse, but don't add them to docs
217     forward_decls = {}         # hashtable of forward declarations, we skip
218                                 #   them if we find the real declaration
219                                 #   later.
220     doc_comments = {}          # hastable of doc-comment we found. We can
221                                 # use that to put element to the right
222                                 # sction in the generated section-file
224     file_basename = None
226     deprecated_conditional_nest = 0
227     ignore_conditional_nest = 0
229     deprecated = ''
230     doc_comment = ''
232     # Don't scan headers twice
233     canonical_input_file = os.path.realpath(input_file)
234     if canonical_input_file in seen_headers:
235         logging.info('File already scanned: %s', input_file)
236         return
238     seen_headers[canonical_input_file] = 1
240     file_basename = os.path.split(input_file)[1][:-2] # filename ends in .h
242     # Check if the basename is in the list of headers to ignore.
243     matchstr = r'(\s|^)' + re.escape(file_basename) + r'\.h(\s|$)'
244     if re.search(matchstr, options.ignore_headers):
245         logging.info('File ignored: %s', input_file)
246         return
248     # Check if the full name is in the list of headers to ignore.
249     matchstr = r'(\s|^)' + re.escape(input_file) + r'(\s|$)'
250     if re.search(matchstr, options.ignore_headers):
251         logging.info('File ignored: %s', input_file)
252         return
254     if not os.path.exists(input_file):
255         logging.warning('File does not exist: %s', input_file)
256         return
258     logging.info('Scanning %s', input_file)
260     for line in open(input_file):
261         # If this is a private header, skip it.
262         if re.search(r'%^\s*/\*\s*<\s*private_header\s*>\s*\*/', line):
263             return
265         # Skip to the end of the current comment.
266         if in_comment:
267             logging.info('Comment: %s', line)
268             doc_comment += line
269             if re.search(r'\*/', line):
270                 m = re.search(r'\* ([a-zA-Z][a-zA-Z0-9_]+):/', doc_comment)
271                 if m:
272                   doc_comments[m.group(1).lower()] = 1
273                 in_comment = 0
274                 doc_comment = ''
275             continue
277         # Keep a count of #if, #ifdef, #ifndef nesting,
278         # and if we enter a deprecation-symbol-bracketed
279         # zone, take note.
280         m = re.search(r'^\s*#\s*if(?:n?def\b|\s+!?\s*defined\s*\()\s*(\w+)', line)
281         if m:
282             define_name = m.group(1)
283             if deprecated_conditional_nest < 1 and re.search(options.deprecated_guards, define_name):
284                 deprecated_conditional_nest = 1
285             elif deprecated_conditional_nest >= 1:
286                 deprecated_conditional_nest += 1
287             if ignore_conditional_nest == 0 and '__GTK_DOC_IGNORE__' in define_name:
288                 ignore_conditional_nest = 1
289             elif ignore_conditional_nest > 0:
290                 ignore_conditional_nest = 1
292         elif re.search(r'^\s*#\sif', line):
293             if deprecated_conditional_nest >= 1:
294                  deprecated_conditional_nest += 1
296             if ignore_conditional_nest > 0:
297                  ignore_conditional_nest += 1
298         elif re.search(r'^\s*#endif', line):
299             if deprecated_conditional_nest >= 1:
300                 deprecated_conditional_nest -= 1
302             if ignore_conditional_nest > 0:
303                 ignore_conditional_nest -= 1
305         # If we find a line containing _DEPRECATED, we hope that this is
306         # attribute based deprecation and also treat this as a deprecation
307         # guard, unless it's a macro definition.
308         if deprecated_conditional_nest == 0 and '_DEPRECATED' in line:
309             m = re.search(r'^\s*#\s*(if*|define)', line)
310             if not (m or in_declaration == 'enum'):
311                 logging.info('Found deprecation annotation (decl: "%s"): "%s"', in_declaration, line)
312                 deprecated_conditional_nest += 0.1
314         # set flag that is used later when we do AddSymbolToList
315         if deprecated_conditional_nest > 0:
316             deprecated = '<DEPRECATED/>\n'
317         else:
318             deprecated = ''
320         if ignore_conditional_nest:
321             continue
323         if not in_declaration:
324             # Skip top-level comments.
325             m = re.search(r'^\s*/\*', line)
326             if m:
327                 re.sub(r'^\s*/\*', '', line)
328                 if re.search(r'\*/', line):
329                     logging.info('Found one-line comment: %s', line)
330                 else:
331                     in_comment = 1
332                     doc_comment = line
333                     logging.info('Found start of comment: %s', line)
334                 continue
336             logging.info('no decl: %s', line.strip())
338             m = re.search(r'^\s*#\s*define\s+(\w+)', line)
339             #                                $1                                $3            $4             $5
340             m2 = re.search(r'^\s*typedef\s+((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(', line)
341             #                      $1                                $3            $4             $5
342             m3 = re.search(r'^\s*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(', line)
343             #                    $1            $2
344             m4 = re.search(r'^\s*(\**)\s*\(\*\s*(\w+)\)\s*\(', line)
345             #                              $1                                $3
346             m5 = re.search(r'^\s*typedef\s*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*', previous_line)
347             #                                                                           $1                                $3            $4             $5
348             m6 = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((const\s+|G_CONST_RETURN\s+)?\w+)(\s+const)?\s*(\**)\s*\(\*\s*(\w+)\)\s*\(' % options.ignore_decorators, line)
349             m7 = re.search(r'^\s*enum\s+_?(\w+)\s+\{', line)
350             m8 = re.search(r'^\s*typedef\s+enum', line)
351             m9 = re.search(r'^\s*typedef\s+(struct|union)\s+_(\w+)\s+\2\s*;', line)
352             m10 = re.search(r'^\s*(struct|union)\s+(\w+)\s*;', line)
353             m11 = re.search(r'^\s*typedef\s+(struct|union)\s*\w*\s*{', line)
354             m12 = re.search(r'^\s*typedef\s+(?:struct|union)\s+\w+[\s\*]+(\w+)\s*;', line)
355             m13 = re.search(r'^\s*(G_GNUC_EXTENSION\s+)?typedef\s+(.+[\s\*])(\w+)(\s*\[[^\]]+\])*\s*;', line)
356             m14 = re.search(r'^\s*(extern|[A-Za-z_]+VAR|%s)\s+((const\s+|signed\s+|unsigned\s+|long\s+|short\s+)*\w+)(\s+\*+|\*+|\s)\s*(const\s+)*([A-Za-z]\w*)\s*;' % options.ignore_decorators, line)
357             m15 = re.search(r'^\s*((const\s+|signed\s+|unsigned\s+|long\s+|short\s+)*\w+)(\s+\*+|\*+|\s)\s*(const\s+)*([A-Za-z]\w*)\s*\=', line)
358             m16 = re.search(r'.*G_DECLARE_(FINAL_TYPE|DERIVABLE_TYPE|INTERFACE)\s*\(', line)
359             #                                                          $1                                                                                                    $2                                                          $3
360             m17 = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s+|\*)+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*(_[A-Za-z]\w*)\s*\(' % options.ignore_decorators, line)
361             #                                                          $1                                                                                                    $2                                                          $3
362             m18 = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s+|\*)+(?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*([A-Za-z]\w*)\s*\(' % options.ignore_decorators, line)
363             m19 = re.search(r'^\s*([A-Za-z]\w*)\s*\(', line)
364             m20 = re.search(r'^\s*\(', line)
365             m21 = re.search(r'^\s*struct\s+_?(\w+)', line)
366             m22 = re.search(r'^\s*union\s+_(\w+)', line)
368             # MACROS
370             if m:
371                 symbol = m.group(1)
372                 decl = line
373                 # We assume all macros which start with '_' are private.
374                 # We also try to skip the first macro if it looks like the
375                 # standard #ifndef HEADER_FILE #define HEADER_FILE etc.
376                 # And we only want TRUE & FALSE defined in GLib.
377                 if not symbol.startswith('_') \
378                      and (not re.search(r'#ifndef\s+' + symbol, previous_line)
379                          or first_macro == 0) \
380                      and ((symbol != 'TRUE' and symbol != 'FALSE')
381                          or options.module == 'glib'):
382                     in_declaration = 'macro'
383                     logging.info('Macro: "%s"', symbol)
384                 else:
385                     logging.info('skipping Macro: "%s"', symbol)
386                     in_declaration = 'macro'
387                     internal = 1
388                 first_macro = 0
391             # TYPEDEF'D FUNCTIONS (i.e. user functions)
393             elif m2:
394                 p3 = m2.group(3) or ''
395                 ret_type = "%s%s %s" % (m2.group(1), p3, m2.group(4))
396                 symbol = m2.group(5)
397                 decl = line[m2.end():]
398                 in_declaration = 'user_function'
399                 logging.info('user function (1): "%s", Returns: "%s"', symbol, ret_type)
401             elif re.search(r'^\s*typedef\s*', previous_line) and m3:
402                 p3 = m3.group(3) or ''
403                 ret_type = '%s%s %s' % (m3.group(1), p3, m3.group(4))
404                 symbol = m3.group(5)
405                 decl = line[m3.end():]
406                 in_declaration = 'user_function'
407                 logging.info('user function (2): "%s", Returns: "%s"', symbol, ret_type)
409             elif re.search(r'^\s*typedef\s*', previous_line) and m4:
410                 ret_type = m4.group(1)
411                 symbol = m4.group(2)
412                 decl = line[m4.end():]
413                 if m5:
414                     p3 = m5.group(3) or ''
415                     ret_type = "%s%s %s" % (m5.group(1), p3, ret_type)
416                     in_declaration = 'user_function'
417                     logging.info('user function (3): "%s", Returns: "%s"', symbol, ret_type)
419             # FUNCTION POINTER VARIABLES
420             elif m6:
421                 p3 = m6.group(3) or ''
422                 ret_type = '%s%s %s' % (m6.group(1), p3, m6.group(4))
423                 symbol = m6.group(5)
424                 decl = line[m6.end():]
425                 in_declaration = 'user_function'
426                 logging.info('function pointer variable: "%s", Returns: "%s"', symbol, ret_type)
428             # ENUMS
430             elif m7:
431                 re.sub(r'^\s*enum\s+_?(\w+)\s+\{', r'enum \1 {',line)
432                 # We assume that 'enum _<enum_name> {' is really the
433                 # declaration of enum <enum_name>.
434                 symbol = m7.group(1)
435                 decl = line
436                 in_declaration = 'enum'
437                 logging.info('plain enum: "%s"', symbol)
439             elif re.search(r'^\s*typedef\s+enum\s+_?(\w+)\s+\1\s*;', line):
440                 # We skip 'typedef enum <enum_name> _<enum_name>;' as the enum will
441                 # be declared elsewhere.
442                 logging.info('skipping enum typedef: "%s"', line)
443             elif m8:
444                 symbol = ''
445                 decl = line
446                 in_declaration = 'enum'
447                 logging.info('typedef enum: -')
449             # STRUCTS AND UNIONS
451             elif m9:
452                 # We've found a 'typedef struct _<name> <name>;'
453                 # This could be an opaque data structure, so we output an
454                 # empty declaration. If the structure is actually found that
455                 # will override this.
456                 structsym = m9.group(1).upper()
457                 logging.info('%s typedef: "%s"', structsym, m9.group(2))
458                 forward_decls[m9.group(2)] = '<%s>\n<NAME>%s</NAME>\n%s</%s>\n' % (structsym, m9.group(2), deprecated, structsym)
460             elif re.search(r'^\s*(?:struct|union)\s+_(\w+)\s*;', line):
461                 # Skip private structs/unions.
462                 logging.info('private struct/union')
464             elif m10:
465                 # Do a similar thing for normal structs as for typedefs above.
466                 # But we output the declaration as well in this case, so we
467                 # can differentiate it from a typedef.
468                 structsym = m10.group(1).upper()
469                 logging.info('%s:%s', structsym, m10.group(2))
470                 forward_decls[m10.group(2)] = '<%s>\n<NAME>%s</NAME>\n%s%s</%s>\n' % (structsym, m10.group(2), line, deprecated, structsym)
472             elif m11:
473                 symbol = ''
474                 decl = line
475                 level = 0
476                 in_declaration = m11.group(1)
477                 logging.info('typedef struct/union "%s"', in_declaration)
479             # OTHER TYPEDEFS
481             elif m12:
482                 logging.info('Found struct/union(*) typedef "%s": "%s"', m12.group(1), line)
483                 if AddSymbolToList(slist, m12.group(1)):
484                     decl_list.append('<TYPEDEF>\n<NAME>%s</NAME>\n%s%s</TYPEDEF>\n' % (m12.group(1), deprecated, line))
486             elif m13:
487                 if m13.group(2).split()[0] not in ('struct', 'union'):
488                     logging.info('Found typedef: "%s"', line)
489                     if AddSymbolToList(slist, m13.group(3)):
490                         decl_list.append('<TYPEDEF>\n<NAME>%s</NAME>\n%s%s</TYPEDEF>\n' % (m13.group(3), deprecated, line))
491             elif re.search(r'^\s*typedef\s+', line):
492                 logging.info('Skipping typedef: "%s"', line)
494             # VARIABLES (extern'ed variables)
496             elif m14:
497                 symbol = m14.group(6)
498                 line = re.sub(r'^\s*([A-Za-z_]+VAR)\b', r'extern', line)
499                 decl = line
500                 logging.info('Possible extern var "%s": "%s"', symbol, decl)
501                 if AddSymbolToList(slist, symbol):
502                     decl_list.append('<VARIABLE>\n<NAME>%s</NAME>\n%s%s</VARIABLE>\n' % (symbol, deprecated, decl))
504             # VARIABLES
506             elif m15:
507                 symbol = m15.group(5)
508                 decl = line
509                 logging.info('Possible global var" %s": "%s"', symbol, decl)
510                 if AddSymbolToList(slist, symbol):
511                     decl_list.append('<VARIABLE>\n<NAME>%s</NAME>\n%s%s</VARIABLE>\n' % (symbol, deprecated, decl))
513             # G_DECLARE_*
515             elif m16:
516                 in_declaration = 'g-declare'
517                 symbol = 'G_DECLARE_' + m16.group(1)
518                 decl = line[m16.end():]
520             # FUNCTIONS
522             # We assume that functions which start with '_' are private, so
523             # we skip them.
524             elif m17:
525                 ret_type = m17.group(1)
526                 if m17.group(2):
527                     ret_type += ' ' + m17.group(2)
528                 symbol = m17.group(3)
529                 decl = line[m17.end():]
530                 logging.info('internal Function: "%s", Returns: "%s""%s"', symbol, m17.group(1), m17.group(2))
531                 in_declaration = 'function'
532                 internal = 1
533                 if line.strip().startswith('G_INLINE_FUNC'):
534                     logging.info('skip block after inline function')
535                     # now we we need to skip a whole { } block
536                     skip_block = 1
538             elif m18:
539                 ret_type = m18.group(1)
540                 if m18.group(2):
541                     ret_type += ' ' + m18.group(2)
542                 symbol = m18.group(3)
543                 decl = line[m18.end():]
544                 logging.info('Function (1): "%s", Returns: "%s""%s"', symbol, m18.group(1), m18.group(2))
545                 in_declaration = 'function'
546                 if line.strip().startswith('G_INLINE_FUNC'):
547                     logging.info('skip block after inline function')
548                     # now we we need to skip a whole { } block
549                     skip_block = 1
551             # Try to catch function declarations which have the return type on
552             # the previous line. But we don't want to catch complete functions
553             # which have been declared G_INLINE_FUNC, e.g. g_bit_nth_lsf in
554             # glib, or 'static inline' functions.
555             elif m19:
556                 symbol = m19.group(1)
557                 decl = line[m19.end():]
559                 previous_line_words = previous_line.strip().split()
561                 if not previous_line.strip().startswith('G_INLINE_FUNC'):
562                     if not previous_line_words or previous_line_words[0] != 'static':
563                         #                                           $1                                                                                                   $2
564                         pm = re.search(r'^\s*(?:\b(?:extern|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$' % options.ignore_decorators, previous_line)
565                         if pm:
566                             ret_type = pm.group(1)
567                             if pm.group(2):
568                                 ret_type += ' ' + pm.group(2)
569                             logging.info('Function  (2): "%s", Returns: "%s"', symbol, ret_type)
570                             in_declaration = 'function'
571                     else:
572                         logging.info('skip block after inline function')
573                         # now we we need to skip a whole { } block
574                         skip_block = 1
575                         #                                                                                  $1                                                                                                    $2
576                         pm = re.search(r'^\s*(?:\b(?:extern|static|inline|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$' % options.ignore_decorators, previous_line)
577                         if pm:
578                             ret_type = pm.group(1)
579                             if pm.group(2):
580                                 ret_type += ' ' + pm.group(2)
581                             logging.info('Function  (3): "%s", Returns: "%s"', symbol, ret_type)
582                             in_declaration = 'function'
583                 else:
584                     if not previous_line_words or previous_line_words[0] != 'static':
585                         logging.info('skip block after inline function')
586                         # now we we need to skip a whole { } block
587                         skip_block = 1
588                         #                                                                                  $1                                                                                                    $2
589                         pm = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|long\s+|short\s+|struct\s+|union\s+|enum\s+)*\w+)((?:\s*(?:\*+|\bconst\b|\bG_CONST_RETURN\b))*)\s*$' % options.ignore_decorators, previous_line)
590                         if pm:
591                             ret_type = pm.group(1)
592                             if pm.group(2):
593                                 ret_type += ' ' + pm.group(2)
594                             logging.info('Function (4): "%s", Returns: "%s"', symbol, ret_type)
595                             in_declaration = 'function'
597             # Try to catch function declarations with the return type and name
598             # on the previous line(s), and the start of the parameters on this.
599             elif m20:
600                 decl = line[m20.end():]
601                 pm = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|enum\s+)*\w+)(\s+\*+|\*+|\s)\s*([A-Za-z]\w*)\s*$' % options.ignore_decorators, previous_line)
602                 ppm = re.search(r'^\s*(?:\b(?:extern|G_INLINE_FUNC|%s)\s*)*((?:const\s+|G_CONST_RETURN\s+|signed\s+|unsigned\s+|struct\s+|union\s+|enum\s+)*\w+(?:\**\s+\**(?:const|G_CONST_RETURN))?(?:\s+|\s*\*+))\s*$' % options.ignore_decorators, pre_previous_line)
603                 if pm:
604                     ret_type = pm.group(1) + ' ' + pm.group(2)
605                     symbol = pm.group(3)
606                     in_declaration = 'function'
607                     logging.info('Function (5): "%s", Returns: "%s"', symbol, ret_type)
609                 elif re.search(r'^\s*\w+\s*$', previous_line) and ppm:
610                     ret_type = ppm.group(1)
611                     ret_type = re.sub(r'\s*\n', '', ret_type, flags=re.MULTILINE)
612                     in_declaration = 'function'
614                     symbol = previous_line
615                     symbol = re.sub(r'^\s+', '', symbol)
616                     symbol = re.sub(r'\s*\n', '', symbol, flags=re.MULTILINE)
617                     logging.info('Function (6): "%s", Returns: "%s"', symbol, ret_type)
619             #} elsif (m/^extern\s+/) {
620                 #print "DEBUG: Skipping extern: $_"
623             # STRUCTS
625             elif re.search(r'^\s*struct\s+_?(\w+)\s*\*', line):
626                 # Skip 'struct _<struct_name> *', since it could be a
627                 # return type on its own line.
628                 pass
629             elif m21:
630                 # We assume that 'struct _<struct_name>' is really the
631                 # declaration of struct <struct_name>.
632                 symbol = m21.group(1)
633                 decl = line
634                  # we will find the correct level as below we do $level += tr/{//
635                 level = 0
636                 in_declaration = 'struct'
637                 logging.info('Struct(_): "%s"', symbol)
640             # UNIONS
642             elif re.search(r'^\s*union\s+_(\w+)\s*\*', line):
643                 # Skip 'union _<union_name> *' (see above)
644                 pass
645             elif m22:
646                 symbol = m22.group(1)
647                 decl = line
648                 level = 0
649                 in_declaration = 'union'
650                 logging.info('Union(_): "%s"', symbol)
651         else:
652             logging.info('in decl: skip=%s %s', skip_block, line.strip())
653             # If we were already in the middle of a declaration, we simply add
654             # the current line onto the end of it.
655             if skip_block == 0:
656                 decl += line
657             else:
658                 # Remove all nested pairs of curly braces.
659                 brace_remover = r'{[^{]*}'
660                 bm = re.search(brace_remover, line)
661                 while bm:
662                     line = re.sub(brace_remover, '', line)
663                     bm = re.search(brace_remover, line)
664                 # Then hope at most one remains in the line...
665                 bm = re.search(r'(.*?){', line)
666                 if bm:
667                     if skip_block == 1:
668                         decl += bm.group(1)
669                     skip_block += 1
670                 elif '}' in line:
671                     skip_block -= 1
672                     if skip_block == 1:
673                         # this is a hack to detect the end of declaration
674                         decl += ';'
675                         skip_block = 0
676                         logging.info('2: ---')
678                 else:
679                     if skip_block == 1:
680                         decl += line
682         if in_declaration == "g-declare":
683             dm = re.search(r'\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*,\s*(\w+)\s*\).*$', decl)
684             # FIXME the original code does s// stuff here and we don't. Is it necessary?
685             if dm:
686                 ModuleObjName = dm.group(1)
687                 module_obj_name = dm.group(2)
688                 if options.rebuild_types:
689                     get_types.append(module_obj_name + '_get_type')
690                 forward_decls[ModuleObjName] = '<STRUCT>\n<NAME>%s</NAME>\n%s</STRUCT>\n' % (ModuleObjName, deprecated)
691                 if symbol.startswith('G_DECLARE_DERIVABLE'):
692                     forward_decls[ModuleObjName + 'Class'] = '<STRUCT>\n<NAME>%sClass</NAME>\n%s</STRUCT>\n' % (ModuleObjName, deprecated)
693                 if symbol.startswith('G_DECLARE_INTERFACE'):
694                     forward_decls[ModuleObjName + 'Interface'] = '<STRUCT>\n<NAME>%sInterface</NAME>\n%s</STRUCT>\n' % (ModuleObjName, deprecated)
695                 in_declaration = ''
697         # Note that sometimes functions end in ') G_GNUC_PRINTF (2, 3);' or
698         # ') __attribute__ (...);'.
699         if in_declaration == 'function':
700             regex = r'\)\s*(G_GNUC_.*|.*DEPRECATED.*|%s\s*|__attribute__\s*\(.*\)\s*)*;.*$' % options.ignore_decorators
701             pm = re.search(regex, decl, flags=re.MULTILINE)
702             if pm:
703                 logging.info('scrubbing:[%s]', decl)
704                 decl = re.sub(regex, '', decl, flags=re.MULTILINE)
705                 logging.info('scrubbed:[%s]', decl)
706                 if internal == 0:
707                      decl = re.sub(r'/\*.*?\*/', '', decl, flags=re.MULTILINE)   # remove comments.
708                      decl = re.sub(r'\s*\n\s*(?!$)', ' ', decl, flags=re.MULTILINE)  # consolidate whitespace at start/end of lines.
709                      decl = decl.strip()
710                      ret_type = re.sub(r'/\*.*?\*/', '', ret_type)               # remove comments in ret type.
711                      if AddSymbolToList(slist, symbol):
712                          decl_list.append('<FUNCTION>\n<NAME>%s</NAME>\n%s<RETURNS>%s</RETURNS>\n%s\n</FUNCTION>\n' % (symbol, deprecated, ret_type, decl))
713                          if options.rebuild_types:
714                              # check if this looks like a get_type function and if so remember
715                              if symbol.endswith('_get_type') and 'GType' in ret_type and re.search(r'^(void|)$', decl):
716                                  logging.info("Adding get-type: [%s] [%s] [%s]\tfrom %s", ret_type, symbol, decl, input_file)
717                                  get_types.append(symbol)
718                 else:
719                      internal = 0
720                 deprecated_conditional_nest = int(deprecated_conditional_nest)
721                 in_declaration = ''
722                 skip_block = 0
724         if in_declaration == 'user_function':
725             if re.search(r'\).*$', decl):
726                 decl = re.sub(r'\).*$', '', decl)
727                 if AddSymbolToList(slist, symbol):
728                     decl_list.append('<USER_FUNCTION>\n<NAME>%s</NAME>\n%s<RETURNS>%s</RETURNS>\n%s</USER_FUNCTION>\n' % (symbol, deprecated, ret_type, decl))
729                 deprecated_conditional_nest = int(deprecated_conditional_nest)
730                 in_declaration = ''
732         if in_declaration == 'macro':
733             if not re.search(r'\\\s*$', decl):
734                 if internal == 0:
735                     if AddSymbolToList(slist, symbol):
736                         decl_list.append('<MACRO>\n<NAME>%s</NAME>\n%s%s</MACRO>\n' % (symbol, deprecated, decl))
737                 else:
738                     internal = 0
739                 deprecated_conditional_nest = int(deprecated_conditional_nest)
740                 in_declaration = ''
742         if in_declaration == 'enum':
743             em = re.search(r'\}\s*(\w+)?;\s*$', decl)
744             if em:
745                 if symbol == '':
746                     symbol = em.group(1)
747                 if AddSymbolToList(slist, symbol):
748                     decl_list.append('<ENUM>\n<NAME>%s</NAME>\n%s%s</ENUM>\n' % (symbol, deprecated, decl))
749                 deprecated_conditional_nest = int(deprecated_conditional_nest)
750                 in_declaration = ''
752         # We try to handle nested stucts/unions, but unmatched brackets in
753         # comments will cause problems.
754         if in_declaration == 'struct' or in_declaration == 'union':
755             sm = re.search(r'\n\}\s*(\w*);\s*$', decl)
756             if level <= 1 and sm:
757                 if symbol == '':
758                     symbol = sm.group(1)
760                 bm = re.search(r'^(\S+)(Class|Iface|Interface)\b', symbol)
761                 if bm:
762                     objectname = bm.group(1)
763                     logging.info('Found object: "%s"', objectname)
764                     title = '<TITLE>%s</TITLE>' % objectname
766                 logging.info('Store struct: "%s"', symbol)
767                 if AddSymbolToList(slist, symbol):
768                     structsym = in_declaration.upper()
769                     decl_list.append('<%s>\n<NAME>%s</NAME>\n%s%s</%s>\n' % (structsym, symbol, deprecated, decl, structsym))
770                     if symbol in forward_decls:
771                         del forward_decls[symbol]
772                 deprecated_conditional_nest = int(deprecated_conditional_nest)
773                 in_declaration = ''
774             else:
775                 # We use tr to count the brackets in the line, and adjust
776                 # $level accordingly.
777                 level += line.count('{')
778                 level -= line.count('}')
779                 logging.info('struct/union level : %d', level)
781         pre_previous_line = previous_line
782         previous_line = line
784     # print remaining forward declarations
785     for symbol in sorted(forward_decls.keys()):
786         if forward_decls[symbol]:
787             AddSymbolToList(slist, symbol)
788             decl_list.append(forward_decls[symbol])
790     # add title
791     slist = [title] + slist
793     logging.info("Scanning %s done", input_file)
795     # Try to separate the standard macros and functions, placing them at the
796     # end of the current section, in a subsection named 'Standard'.
797     # do this in a loop to catch object, enums and flags
798     klass = lclass = prefix = lprefix = None
799     standard_decl = []
800     liststr = '\n'.join(s for s in slist if s) + '\n'
801     while True:
802         m = re.search(r'^(\S+)_IS_(\S*)_CLASS\n', liststr, flags=re.MULTILINE)
803         m2 = re.search(r'^(\S+)_IS_(\S*)\n', liststr, flags=re.MULTILINE)
804         m3 = re.search(r'^(\S+?)_(\S*)_get_type\n', liststr, flags=re.MULTILINE)
805         if m:
806             prefix = m.group(1)
807             lprefix = prefix.lower()
808             klass = m.group(2)
809             lclass = klass.lower()
810             logging.info("Found gobject type '%s_%s' from is_class macro", prefix, klass)
811         elif m2:
812             prefix = m2.group(1)
813             lprefix = prefix.lower()
814             klass = m2.group(2)
815             lclass = klass.lower()
816             logging.info("Found gobject type '%s_%s' from is_ macro", prefix, klass)
817         elif m3:
818             lprefix = m3.group(1)
819             prefix = lprefix.upper()
820             lclass = m3.group(2)
821             klass = lclass.upper()
822             logging.info("Found gobject type '%s_%s' from get_type function", prefix, klass)
823         else:
824             break
826         cclass = lclass
827         cclass = cclass.replace('_', '')
828         mtype = lprefix + cclass
830         liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sPrivate\n' % mtype)
832         # We only leave XxYy* in the normal section if they have docs
833         if mtype not in doc_comments:
834             logging.info("  Hide instance docs for %s", mtype)
835             liststr, standard_decl = replace_once(liststr, standard_decl, r'^%s\n' % mtype)
837         if mtype + 'class' not in doc_comments:
838             logging.info("  Hide class docs for %s", mtype)
839             liststr, standard_decl = replace_once(liststr, standard_decl, r'^%sClass\n' % mtype)
841         if mtype + 'interface' not in doc_comments:
842             logging.info("  Hide iface docs for %s", mtype)
843             liststr, standard_decl = replace_once(liststr, standard_decl, r'%sInterface\n' % mtype)
845         if mtype + 'iface' not in doc_comments:
846             logging.info("  Hide iface docs for " + mtype)
847             liststr, standard_decl = replace_once(liststr, standard_decl, r'%sIface\n' % mtype)
849         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s\n' % klass)
850         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_TYPE_%s\n' % klass)
851         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_get_type\n' % lclass)
852         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_CLASS\n' % klass)
853         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_IS_%s_CLASS\n' % klass)
854         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_CLASS\n' % klass)
855         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_IFACE\n' % klass)
856         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s_GET_INTERFACE\n' % klass)
857         # We do this one last, otherwise it tends to be caught by the IS_$class macro
858         liststr, standard_decl = replace_all(liststr, standard_decl, r'^\S+_%s\n' % klass)
860     logging.info('Decl:%s---', liststr)
861     logging.info('Std :%s---', ''.join(sorted(standard_decl)))
862     if len(standard_decl):
863       # sort the symbols
864       liststr += '<SUBSECTION Standard>\n' + ''.join(sorted(standard_decl))
866     if liststr != '':
867         if file_basename not in section_list:
868             section_list[file_basename] = ''
869         section_list[file_basename] += "<SECTION>\n<FILE>%s</FILE>\n%s</SECTION>\n\n" % (file_basename, liststr)
872 def replace_once(liststr, standard_decl, regex):
873     mre = re.search(regex, liststr,  flags=re.IGNORECASE|re.MULTILINE)
874     if mre:
875         standard_decl.append(mre.group(0))
876         liststr = re.sub(regex, '', liststr, flags=re.IGNORECASE|re.MULTILINE)
877     return liststr, standard_decl
880 def replace_all(liststr, standard_decl, regex):
881     mre = re.search(regex, liststr, flags=re.MULTILINE)
882     while mre:
883         standard_decl.append(mre.group(0))
884         liststr = re.sub(regex, '', liststr, flags=re.MULTILINE)
885         mre = re.search(regex, liststr, flags=re.MULTILINE)
886     return liststr, standard_decl
888 #############################################################################
889 # Function    : AddSymbolToList
890 # Description : This adds the symbol to the list of declarations, but only if
891 #                it is not already in the list.
892 # Arguments   : $list - reference to the list of symbols, one on each line.
893 #                $symbol - the symbol to add to the list.
894 #############################################################################
896 def AddSymbolToList(slist, symbol):
897     if symbol in slist:
898          #logging.info('Symbol %s already in list. skipping', symbol)
899          # we return False to skip outputting another entry to -decl.txt
900          # this is to avoid redeclarations (e.g. in conditional sections).
901         return False
902     slist.append(symbol)
903     return True
905 if __name__ == '__main__':
906     #logging.basicConfig(level=logging.INFO)
907     Run()