common: return dicts not tuples
[gtk-doc.git] / gtkdoc-depscan.in
blobaccc7d03cb668e43df17ceaa6b1b1e5c6e50bf92
1 #!@PYTHON@
3 import gzip, os.path, re
5 from os import environ, popen, walk
6 from optparse import OptionParser
7 from sys import stderr
9 from xml.sax import ContentHandler, make_parser
10 from xml.sax.handler import feature_external_ges
12 default_books = ['atk', 'gdk', 'gdk-pixbuf', 'glib', 'gio', 'gobject', 'gtk', 'pango']
14 __comment_regex = re.compile(r'/\*.*?\*/', re.DOTALL)
15 __word_regex = re.compile(r'\b[A-Za-z_][A-Za-z0-9_]*\b')
17 u2a_table = { 0x2018:0x27, 0x2019:0x27, 0x201C:0x22, 0x201D:0x22 }
19 class Book(object):
20     def __init__(self, name, folders, version=None):
21         self.__catalog   = None
22         self.__name      = name
23         self.__symbols   = None
24         self.__timestamp = 0
25         self.__title     = None
26         self.__version   = version
28         for f in folders:
29             catalogs = map(
30                 lambda n: os.path.join(f, name, n % name),
31                 ['%s.devhelp2', '%s.devhelp2.gz'])
32             catalogs = map(
33                 lambda n: (os.path.getmtime(n), n),
34                 filter(os.path.isfile, catalogs))
36             catalogs.sort()
38             if catalogs:
39                 self.__catalog = catalogs[-1][1]
40                 break
42         if not self.__catalog:
43             raise IOError, 'No devhelp book found for "%s"' % name
45     def __cmp__(self, other):
46         if isinstance(other, Book):
47             return cmp(self.name, other.name)
49         return 0
51     def __repr__(self):
52         return '<Book name="%s">' % self.__name
54     def parse(self):
55         timestamp = os.path.getmtime(self.__catalog)
57         if not self.__symbols or timestamp > self.__timestamp:
58             class DevhelpContentHandler(ContentHandler):
59                 def __init__(self, book, symbols):
60                     self.__book = book
61                     self.__symbols = symbols
63                 def startElement(self, name, attrs):
64                     if 'book' == name:
65                         self.title = attrs.get('title')
66                         return
68                     if 'keyword' == name:
69                         symbol = Symbol.from_xml(self.__book, attrs)
70                         if symbol: self.__symbols[symbol.name] = symbol
71                         return
73             self.__symbols, self.__timestamp = dict(), timestamp
74             handler = DevhelpContentHandler(self, self.__symbols)
76             parser = make_parser()
77             parser.setFeature(feature_external_ges, False)
78             parser.setContentHandler(handler)
80             if self.__catalog.endswith('.gz'):
81                 parser.parse(gzip.open(self.__catalog))
83             else:
84                 parser.parse(open(self.__catalog))
86             self.__title = handler.title
88     def _get_symbols(self):
89         self.parse(); return self.__symbols
90     def _get_title(self):
91         self.parse(); return self.__title
93     def find_requirements(self):
94         requirements = dict()
96         for symbol in self.symbols.values():
97             if not symbol.matches:
98                 continue
100             if symbol.since and symbol.since > self.version:
101                 symbol_list = requirements.get(symbol.since, [])
102                 requirements[symbol.since] = symbol_list
103                 symbol_list.append(symbol)
105         return requirements
107     catalog = property(lambda self: self.__catalog)
108     name    = property(lambda self: self.__name)
109     version = property(lambda self: self.__version)
111     symbols = property(_get_symbols)
112     title   = property(_get_title)
114 class Symbol(object):
115     known_attributes = ('name', 'type', 'link', 'deprecated', 'since')
117     class DeprecationInfo(object):
118         def __init__(self, text):
119             if text.count(':'):
120                 pair = text.split(':', 1)
122                 self.__version = Symbol.VersionInfo(pair[0])
123                 self.__details = pair[1].strip()
125             else:
126                 self.__version = None
127                 self.__details = text.strip()
129         def __cmp__(self, other):
130             if isinstance(other, Symbol.DeprecationInfo):
131                 return cmp(self.version, other.version)
133             if isinstance(other, Symbol.VersionInfo):
134                 return cmp(self.version, other)
136             return 1
138         def __str__(self):
139             if not self.__version:
140                 return self.__details and str(self.__details) or 'Deprecated'
142             if self.__details:
143                 return 'Since %s: %s' % (self.__version, self.__details)
145             return 'Since %s' % self.__version
147         details = property(lambda self: self.__details)
148         version = property(lambda self: self.__version)
150     class VersionInfo(object):
151         def __init__(self, text):
152             match = re.match(r'^\w*\s*((?:\d+\.)*\d+)', text)
154             self.__numbers = map(int, match.group(1).split('.'))
155             self.__hash = reduce(lambda x, y: x * 1000 + y, reversed(self.__numbers))
156             self.__text = text.strip()
158         def __get_number(self, index):
159             if len(self.__numbers) > index:
160                 return self.__numbers[index]
162             return 0
164         def __cmp__(self, other):
165             if isinstance(other, Symbol.VersionInfo):
166                 return cmp(self.numbers, other.numbers)
168             return 1
170         def __hash__(self):
171             return self.__hash
173         def __repr__(self):
174             return '.'.join(map(str, self.__numbers))
176         major   = property(lambda self: self.__get_number(0))
177         minor   = property(lambda self: self.__get_number(1))
178         patch   = property(lambda self: self.__get_number(2))
179         numbers = property(lambda self: self.__numbers)
180         text    = property(lambda self: self.__text)
182     @classmethod
183     def from_xml(cls, book, attrs):
184         name, type, link, deprecated, since = map(attrs.get, Symbol.known_attributes)
186         name = name.strip().translate(u2a_table)
188         if name.endswith('()'):
189             if not type in ('function', 'macro'):
190                 type = (name[0].islower() and 'function' or 'macro')
192             name = name[:-2].strip()
194         words = name.split(' ')
196         if len(words) > 1:
197             if words[0] in ('enum', 'struct', 'union'):
198                 if not type: type = words[0]
199                 name = name[len(words[0]):].strip()
200             elif 'property' == words[-1]:
201                 assert('The' == words[0])
202                 owner = link.split('#', 1)[1].split('-', 1)[0]
203                 type, name = 'property', '%s::%s' % (owner, name.split('"')[1])
204             elif 'signal' == words[-1]:
205                 assert('The' == words[0])
206                 owner = link.split('#', 1)[1].split('-', 1)[0]
207                 type, name = 'signal', '%s:%s' % (owner, name.split('"')[1])
209         if not type: return None
211         if None != deprecated: deprecated = Symbol.DeprecationInfo(deprecated)
212         if since: since = Symbol.VersionInfo(since)
214         if name.count(' '):
215             print >>stderr, (
216                 'WARNING: Malformed symbol name: "%s" (type=%s) in %s.' % (
217                 name, type, book.name))
219         return Symbol(book, name, type, link, deprecated, since)
221     def __init__(self, book, name, type, link=None, deprecated=None, since=None):
222         self.__book       = book
223         self.__name       = name
224         self.__type       = type
225         self.__link       = link
226         self.__deprecated = deprecated
227         self.__since      = since
228         self.__matches    = []
230     def __repr__(self):
231         return (
232             '<Symbol: %s, type=%s, since=%s, deprecated=%s>' % (
233             self.name, self.type, self.since, self.deprecated))
236     book       = property(lambda self: self.__book)
237     name       = property(lambda self: self.__name)
238     type       = property(lambda self: self.__type)
239     link       = property(lambda self: self.__link)
240     deprecated = property(lambda self: self.__deprecated)
241     matches    = property(lambda self: self.__matches)
242     since      = property(lambda self: self.__since)
244 def parse_cmdline():
245     options = OptionParser(version="@VERSION@")
247     options.add_option('-b', '--book', dest='books',
248                        help='name of a devhelp book to consider',
249                        default=[], action='append')
250     options.add_option('-d', '--html-dir', metavar='PATH', dest='dirs',
251                        help='path of additional folders with devhelp books',
252                        default=[], action='append')
253     options.add_option('-s', '--summarize', action='store_true', default=False,
254                        help='print only a brief summary', dest='summarize')
255     options.add_option('-u', '--list-unknown', action='store_true', default=False,
256                        help='list symbols not found in any book', dest='unknown')
257     options.add_option('-v', '--verbose', action='store_true', default=False,
258                        help='print additional information')
260     return options.parse_args()
262 def merge_gnome_path(options):
263     path = environ.get('GNOME2_PATH')
264     path = path and path.split(':') or []
266     prefix = popen(
267         '@PKG_CONFIG@ --variable=prefix glib-2.0'
268         ).readline().rstrip()
270     path.insert(0, prefix)
271     path = filter(None, [p.strip() for p in path])
273     path = [[
274         os.path.join(p, 'share', 'devhelp', 'books'),
275         os.path.join(p, 'share', 'gtk-doc', 'html')]
276         for p in path]
278     path = reduce(list.__add__, path)
279     path = filter(os.path.isdir, path)
281     options.dirs += path
283 def summarize_matches(matches):
284     counts = {}
285     for filename, lineno, symbol in matches:
286         if not isinstance(symbol, Symbol):
287             if options.verbose:
288                 print '%s:%d: unknown symbol %s' % (filename, lineno, symbol)
289             continue
291         since = '%s-%s' % (symbol.book.name, symbol.since)
292         name = symbol.name
293         if since not in counts:
294             counts[since] = {}
295         counts[since][name] = counts[since].get(name, 0) + 1
297     for since, stats in counts.items():
298         counts[since] = list(sorted(stats.items(), key=lambda x: -x[1]))
300     return counts
302 if '__main__' == __name__:
303     options, args = parse_cmdline()
305     merge_gnome_path(options)
307     if not options.books:
308         options.books = default_books
310     def trace(message, *args):
311         if options.verbose: print message % args
313     def parse_book(name):
314         try:
315             match = re.match(r'^(.*?)(?::(\d+(?:\.\d+)*))?$', name)
316             name, version = match.groups()
317             trace('reading book: %s', name)
319             version = version and Symbol.VersionInfo(version)
320             return name, Book(name, options.dirs, version)
322         except IOError, e:
323             print >>stderr, 'WARNING: %s.' % e
325     def scan_source_file(name):
326         contents = None
328         try:
329             contents = __comment_regex.sub('', file(name).read())
331         except IOError, e:
332             print >>stderr, e
334         if contents:
335             trace('scanning: %s', name)
336             lines = contents.split('\n')
338             for lineno in range(len(lines)):
339                 for word in __word_regex.findall(lines[lineno]):
340                     symbol = symbols.get(word)
342                     if symbol:
343                         symbol.matches.append((name, lineno, symbol))
345                     elif options.unknown and word.find('_') > 0:
346                         unknown_symbols.append((name, lineno, word))
348     unknown_symbols = []
349     matches, symbols = dict(), dict()
350     books = dict(filter(None, map(parse_book, set(options.books))))
352     for book in books.values():
353         symbols.update(book.symbols)
355     for name in args:
356         if os.path.isdir(name):
357             for path, dirs, files in walk(name):
358                 for f in files:
359                     if f.endswith('.c'):
360                         scan_source_file(os.path.join(path, f))
362         else:
363             scan_source_file(name)
365     matches = []
367     for book in books.values():
368         requirements = book.find_requirements().items()
369         requirements.sort()
371         if requirements:
372             for symbol in requirements[-1][1]:
373                 matches += symbol.matches
375     if options.unknown:
376         matches += unknown_symbols
378     matches.sort()
380     if options.summarize:
381         summary = summarize_matches(matches)
382         for since in sorted(summary.keys()):
383             print '%s required for' % since
384             for x in summary[since]:
385                 print '    %u %s' % (x[1], x[0])
386     else:
387         for filename, lineno, symbol in matches:
388             if isinstance(symbol, Symbol):
389                 args = filename, lineno, symbol.book.name, symbol.since, symbol.name
390                 print '%s:%d: %s-%s required for %s' % args
392             elif options.verbose:
393                 print '%s:%d: unknown symbol %s' % (filename, lineno, symbol)
395     if options.unknown:
396         unknown = [m[2].split('_')[0].lower() for m in unknown_symbols]
397         unknown = list(set(unknown))
398         unknown.sort()
400         print 'unknown prefixes: %s' % ', '.join(unknown)
402     raise SystemExit(matches and 1 or 0)