mkhtml2: add tag converters for sect2/sect3.
[gtk-doc.git] / gtkdoc-depscan.in
blob9bfaf302e5e315159547b2cd4beabae192319a27
1 #!@PYTHON@
3 from __future__ import print_function
5 import gzip, os.path, re
7 from os import environ, popen, walk
8 from optparse import OptionParser
9 from sys import stderr
11 from xml.sax import ContentHandler, make_parser
12 from xml.sax.handler import feature_external_ges
14 default_books = ['atk', 'gdk', 'gdk-pixbuf', 'glib', 'gio', 'gobject', 'gtk', 'pango']
16 __comment_regex = re.compile(r'/\*.*?\*/', re.DOTALL)
17 __word_regex = re.compile(r'\b[A-Za-z_][A-Za-z0-9_]*\b')
19 u2a_table = { 0x2018:0x27, 0x2019:0x27, 0x201C:0x22, 0x201D:0x22 }
21 class Book(object):
22     def __init__(self, name, folders, version=None):
23         self.__catalog   = None
24         self.__name      = name
25         self.__symbols   = None
26         self.__timestamp = 0
27         self.__title     = None
28         self.__version   = version
30         for f in folders:
31             catalogs = map(
32                 lambda n: os.path.join(f, name, n % name),
33                 ['%s.devhelp2', '%s.devhelp2.gz'])
34             catalogs = map(
35                 lambda n: (os.path.getmtime(n), n),
36                 filter(os.path.isfile, catalogs))
38             catalogs.sort()
40             if catalogs:
41                 self.__catalog = catalogs[-1][1]
42                 break
44         if not self.__catalog:
45             raise IOError('No devhelp book found for "%s"' % name)
47     def __cmp__(self, other):
48         if isinstance(other, Book):
49             return cmp(self.name, other.name)
51         return 0
53     def __repr__(self):
54         return '<Book name="%s">' % self.__name
56     def parse(self):
57         timestamp = os.path.getmtime(self.__catalog)
59         if not self.__symbols or timestamp > self.__timestamp:
60             class DevhelpContentHandler(ContentHandler):
61                 def __init__(self, book, symbols):
62                     self.__book = book
63                     self.__symbols = symbols
65                 def startElement(self, name, attrs):
66                     if 'book' == name:
67                         self.title = attrs.get('title')
68                         return
70                     if 'keyword' == name:
71                         symbol = Symbol.from_xml(self.__book, attrs)
72                         if symbol: self.__symbols[symbol.name] = symbol
73                         return
75             self.__symbols, self.__timestamp = dict(), timestamp
76             handler = DevhelpContentHandler(self, self.__symbols)
78             parser = make_parser()
79             parser.setFeature(feature_external_ges, False)
80             parser.setContentHandler(handler)
82             if self.__catalog.endswith('.gz'):
83                 parser.parse(gzip.open(self.__catalog))
85             else:
86                 parser.parse(open(self.__catalog))
88             self.__title = handler.title
90     def _get_symbols(self):
91         self.parse(); return self.__symbols
92     def _get_title(self):
93         self.parse(); return self.__title
95     def find_requirements(self):
96         requirements = dict()
98         for symbol in self.symbols.values():
99             if not symbol.matches:
100                 continue
102             if symbol.since and symbol.since > self.version:
103                 symbol_list = requirements.get(symbol.since, [])
104                 requirements[symbol.since] = symbol_list
105                 symbol_list.append(symbol)
107         return requirements
109     catalog = property(lambda self: self.__catalog)
110     name    = property(lambda self: self.__name)
111     version = property(lambda self: self.__version)
113     symbols = property(_get_symbols)
114     title   = property(_get_title)
116 class Symbol(object):
117     known_attributes = ('name', 'type', 'link', 'deprecated', 'since')
119     class DeprecationInfo(object):
120         def __init__(self, text):
121             if text.count(':'):
122                 pair = text.split(':', 1)
124                 self.__version = Symbol.VersionInfo(pair[0])
125                 self.__details = pair[1].strip()
127             else:
128                 self.__version = None
129                 self.__details = text.strip()
131         def __cmp__(self, other):
132             if isinstance(other, Symbol.DeprecationInfo):
133                 return cmp(self.version, other.version)
135             if isinstance(other, Symbol.VersionInfo):
136                 return cmp(self.version, other)
138             return 1
140         def __str__(self):
141             if not self.__version:
142                 return self.__details and str(self.__details) or 'Deprecated'
144             if self.__details:
145                 return 'Since %s: %s' % (self.__version, self.__details)
147             return 'Since %s' % self.__version
149         details = property(lambda self: self.__details)
150         version = property(lambda self: self.__version)
152     class VersionInfo(object):
153         def __init__(self, text):
154             match = re.match(r'^\w*\s*((?:\d+\.)*\d+)', text)
156             self.__numbers = map(int, match.group(1).split('.'))
157             self.__hash = reduce(lambda x, y: x * 1000 + y, reversed(self.__numbers))
158             self.__text = text.strip()
160         def __get_number(self, index):
161             if len(self.__numbers) > index:
162                 return self.__numbers[index]
164             return 0
166         def __cmp__(self, other):
167             if isinstance(other, Symbol.VersionInfo):
168                 return cmp(self.numbers, other.numbers)
170             return 1
172         def __hash__(self):
173             return self.__hash
175         def __repr__(self):
176             return '.'.join(map(str, self.__numbers))
178         major   = property(lambda self: self.__get_number(0))
179         minor   = property(lambda self: self.__get_number(1))
180         patch   = property(lambda self: self.__get_number(2))
181         numbers = property(lambda self: self.__numbers)
182         text    = property(lambda self: self.__text)
184     @classmethod
185     def from_xml(cls, book, attrs):
186         name, type, link, deprecated, since = map(attrs.get, Symbol.known_attributes)
188         name = name.strip().translate(u2a_table)
190         if name.endswith('()'):
191             if not type in ('function', 'macro'):
192                 type = (name[0].islower() and 'function' or 'macro')
194             name = name[:-2].strip()
196         words = name.split(' ')
198         if len(words) > 1:
199             if words[0] in ('enum', 'struct', 'union'):
200                 if not type: type = words[0]
201                 name = name[len(words[0]):].strip()
202             elif 'property' == words[-1]:
203                 assert('The' == words[0])
204                 owner = link.split('#', 1)[1].split('-', 1)[0]
205                 type, name = 'property', '%s::%s' % (owner, name.split('"')[1])
206             elif 'signal' == words[-1]:
207                 assert('The' == words[0])
208                 owner = link.split('#', 1)[1].split('-', 1)[0]
209                 type, name = 'signal', '%s:%s' % (owner, name.split('"')[1])
211         if not type: return None
213         if None != deprecated: deprecated = Symbol.DeprecationInfo(deprecated)
214         if since: since = Symbol.VersionInfo(since)
216         if name.count(' '):
217             print >>stderr, (
218                 'WARNING: Malformed symbol name: "%s" (type=%s) in %s.' % (
219                 name, type, book.name))
221         return Symbol(book, name, type, link, deprecated, since)
223     def __init__(self, book, name, type, link=None, deprecated=None, since=None):
224         self.__book       = book
225         self.__name       = name
226         self.__type       = type
227         self.__link       = link
228         self.__deprecated = deprecated
229         self.__since      = since
230         self.__matches    = []
232     def __repr__(self):
233         return (
234             '<Symbol: %s, type=%s, since=%s, deprecated=%s>' % (
235             self.name, self.type, self.since, self.deprecated))
238     book       = property(lambda self: self.__book)
239     name       = property(lambda self: self.__name)
240     type       = property(lambda self: self.__type)
241     link       = property(lambda self: self.__link)
242     deprecated = property(lambda self: self.__deprecated)
243     matches    = property(lambda self: self.__matches)
244     since      = property(lambda self: self.__since)
246 def parse_cmdline():
247     options = OptionParser(version="@VERSION@")
249     options.add_option('-b', '--book', dest='books',
250                        help='name of a devhelp book to consider',
251                        default=[], action='append')
252     options.add_option('-d', '--html-dir', metavar='PATH', dest='dirs',
253                        help='path of additional folders with devhelp books',
254                        default=[], action='append')
255     options.add_option('-s', '--summarize', action='store_true', default=False,
256                        help='print only a brief summary', dest='summarize')
257     options.add_option('-u', '--list-unknown', action='store_true', default=False,
258                        help='list symbols not found in any book', dest='unknown')
259     options.add_option('-v', '--verbose', action='store_true', default=False,
260                        help='print additional information')
262     return options.parse_args()
264 def merge_gnome_path(options):
265     path = environ.get('GNOME2_PATH')
266     path = path and path.split(':') or []
268     prefix = popen(
269         '@PKG_CONFIG@ --variable=prefix glib-2.0'
270         ).readline().rstrip()
272     path.insert(0, prefix)
273     path = filter(None, [p.strip() for p in path])
275     path = [[
276         os.path.join(p, 'share', 'devhelp', 'books'),
277         os.path.join(p, 'share', 'gtk-doc', 'html')]
278         for p in path]
280     path = reduce(list.__add__, path)
281     path = filter(os.path.isdir, path)
283     options.dirs += path
285 def summarize_matches(matches):
286     counts = {}
287     for filename, lineno, symbol in matches:
288         if not isinstance(symbol, Symbol):
289             if options.verbose:
290                 print('%s:%d: unknown symbol %s' % (filename, lineno, symbol))
291             continue
293         since = '%s-%s' % (symbol.book.name, symbol.since)
294         name = symbol.name
295         if since not in counts:
296             counts[since] = {}
297         counts[since][name] = counts[since].get(name, 0) + 1
299     for since, stats in counts.items():
300         counts[since] = list(sorted(stats.items(), key=lambda x: -x[1]))
302     return counts
304 if '__main__' == __name__:
305     options, args = parse_cmdline()
307     merge_gnome_path(options)
309     if not options.books:
310         options.books = default_books
312     def trace(message, *args):
313         if options.verbose: print(message % args)
315     def parse_book(name):
316         try:
317             match = re.match(r'^(.*?)(?::(\d+(?:\.\d+)*))?$', name)
318             name, version = match.groups()
319             trace('reading book: %s', name)
321             version = version and Symbol.VersionInfo(version)
322             return name, Book(name, options.dirs, version)
324         except IOError as e:
325             print >>stderr, 'WARNING: %s.' % e
327     def scan_source_file(name):
328         contents = None
330         try:
331             contents = __comment_regex.sub('', file(name).read())
333         except IOError as e:
334             print >>stderr, e
336         if contents:
337             trace('scanning: %s', name)
338             lines = contents.split('\n')
340             for lineno in range(len(lines)):
341                 for word in __word_regex.findall(lines[lineno]):
342                     symbol = symbols.get(word)
344                     if symbol:
345                         symbol.matches.append((name, lineno, symbol))
347                     elif options.unknown and word.find('_') > 0:
348                         unknown_symbols.append((name, lineno, word))
350     unknown_symbols = []
351     matches, symbols = dict(), dict()
352     books = dict(filter(None, map(parse_book, set(options.books))))
354     for book in books.values():
355         symbols.update(book.symbols)
357     for name in args:
358         if os.path.isdir(name):
359             for path, dirs, files in walk(name):
360                 for f in files:
361                     if f.endswith('.c'):
362                         scan_source_file(os.path.join(path, f))
364         else:
365             scan_source_file(name)
367     matches = []
369     for book in books.values():
370         requirements = book.find_requirements().items()
371         requirements.sort()
373         if requirements:
374             for symbol in requirements[-1][1]:
375                 matches += symbol.matches
377     if options.unknown:
378         matches += unknown_symbols
380     matches.sort()
382     if options.summarize:
383         summary = summarize_matches(matches)
384         for since in sorted(summary.keys()):
385             print('%s required for' % since)
386             for x in summary[since]:
387                 print('    %u %s' % (x[1], x[0]))
388     else:
389         for filename, lineno, symbol in matches:
390             if isinstance(symbol, Symbol):
391                 args = filename, lineno, symbol.book.name, symbol.since, symbol.name
392                 print('%s:%d: %s-%s required for %s' % args)
394             elif options.verbose:
395                 print('%s:%d: unknown symbol %s' % (filename, lineno, symbol))
397     if options.unknown:
398         unknown = [m[2].split('_')[0].lower() for m in unknown_symbols]
399         unknown = list(set(unknown))
400         unknown.sort()
402         print('unknown prefixes: %s' % ', '.join(unknown))
404     raise SystemExit(matches and 1 or 0)