mkdb: add an idea for rewriting path of system entities in the doctype_header
[gtk-doc.git] / gtkdoc-depscan.in
blob5120bb7b120c359cce9bc73b0bf1918d7d2574c6
1 #!@PYTHON@
3 import gzip, os.path, re
5 from os import environ, popen, walk
6 from optparse import OptionParser
7 from sys import stderr
9 from xml.sax import ContentHandler, make_parser
10 from xml.sax.handler import feature_external_ges
12 default_books = ['atk', 'gdk', 'gdk-pixbuf', 'glib', 'gio', 'gobject', 'gtk', 'pango']
14 __comment_regex = re.compile(r'/\*.*?\*/', re.DOTALL)
15 __word_regex = re.compile(r'\b[A-Za-z_][A-Za-z0-9_]*\b')
17 class Book(object):
18     def __init__(self, name, folders, version=None):
19         self.__catalog   = None
20         self.__name      = name
21         self.__symbols   = None
22         self.__timestamp = 0
23         self.__title     = None
24         self.__version   = version
26         for f in folders:
27             catalogs = map(
28                 lambda n: os.path.join(f, name, n % name),
29                 ['%s.devhelp2', '%s.devhelp2.gz'])
30             catalogs = map(
31                 lambda n: (os.path.getmtime(n), n),
32                 filter(os.path.isfile, catalogs))
34             catalogs.sort()
36             if catalogs:
37                 self.__catalog = catalogs[-1][1]
38                 break
40         if not self.__catalog:
41             raise IOError, 'No devhelp book found for "%s"' % name
43     def __cmp__(self, other):
44         if isinstance(other, Book):
45             return cmp(self.name, other.name)
47         return 0
49     def __repr__(self):
50         return '<Book name="%s">' % self.__name
52     def parse(self):
53         timestamp = os.path.getmtime(self.__catalog)
55         if not self.__symbols or timestamp > self.__timestamp:
56             class DevhelpContentHandler(ContentHandler):
57                 def __init__(self, book, symbols):
58                     self.__book = book
59                     self.__symbols = symbols
61                 def startElement(self, name, attrs):
62                     if 'book' == name:
63                         self.title = attrs.get('title')
64                         return
66                     if 'keyword' == name:
67                         symbol = Symbol.from_xml(self.__book, attrs)
68                         if symbol: self.__symbols[symbol.name] = symbol
69                         return
71             self.__symbols, self.__timestamp = dict(), timestamp
72             handler = DevhelpContentHandler(self, self.__symbols)
74             parser = make_parser()
75             parser.setFeature(feature_external_ges, False)
76             parser.setContentHandler(handler)
78             if self.__catalog.endswith('.gz'):
79                 parser.parse(gzip.open(self.__catalog))
81             else:
82                 parser.parse(open(self.__catalog))
84             self.__title = handler.title
86     def _get_symbols(self):
87         self.parse(); return self.__symbols
88     def _get_title(self):
89         self.parse(); return self.__title
91     def find_requirements(self):
92         requirements = dict()
94         for symbol in self.symbols.values():
95             if not symbol.matches:
96                 continue
98             if symbol.since and symbol.since > self.version:
99                 symbol_list = requirements.get(symbol.since, [])
100                 requirements[symbol.since] = symbol_list
101                 symbol_list.append(symbol)
103         return requirements
105     catalog = property(lambda self: self.__catalog)
106     name    = property(lambda self: self.__name)
107     version = property(lambda self: self.__version)
109     symbols = property(_get_symbols)
110     title   = property(_get_title)
112 class Symbol(object):
113     known_attributes = ('name', 'type', 'link', 'deprecated', 'since')
115     class DeprecationInfo(object):
116         def __init__(self, text):
117             if text.count(':'):
118                 pair = text.split(':', 1)
120                 self.__version = Symbol.VersionInfo(pair[0])
121                 self.__details = pair[1].strip()
123             else:
124                 self.__version = None
125                 self.__details = text.strip()
127         def __cmp__(self, other):
128             if isinstance(other, Symbol.DeprecationInfo):
129                 return cmp(self.version, other.version)
131             if isinstance(other, Symbol.VersionInfo):
132                 return cmp(self.version, other)
134             return 1
136         def __str__(self):
137             if not self.__version:
138                 return self.__details and str(self.__details) or 'Deprecated'
140             if self.__details:
141                 return 'Since %s: %s' % (self.__version, self.__details)
143             return 'Since %s' % self.__version
145         details = property(lambda self: self.__details)
146         version = property(lambda self: self.__version)
148     class VersionInfo(object):
149         def __init__(self, text):
150             match = re.match(r'^\w*\s*((?:\d+\.)*\d+)', text)
152             self.__numbers = map(int, match.group(1).split('.'))
153             self.__hash = reduce(lambda x, y: x * 1000 + y, reversed(self.__numbers))
154             self.__text = text.strip()
156         def __get_number(self, index):
157             if len(self.__numbers) > index:
158                 return self.__numbers[index]
160             return 0
162         def __cmp__(self, other):
163             if isinstance(other, Symbol.VersionInfo):
164                 return cmp(self.numbers, other.numbers)
166             return 1
168         def __hash__(self):
169             return self.__hash
171         def __repr__(self):
172             return '.'.join(map(str, self.__numbers))
174         major   = property(lambda self: self.__get_number(0))
175         minor   = property(lambda self: self.__get_number(1))
176         patch   = property(lambda self: self.__get_number(2))
177         numbers = property(lambda self: self.__numbers)
178         text    = property(lambda self: self.__text)
180     @classmethod
181     def from_xml(cls, book, attrs):
182         name, type, link, deprecated, since = map(attrs.get, Symbol.known_attributes)
184         name = name.strip()
186         if name.endswith('()'):
187             if not type in ('function', 'macro'):
188                 type = (name[0].islower() and 'function' or 'macro')
190             name = name[:-2].strip()
192         words = name.split(' ')
194         if len(words) > 1:
195             if words[0] in ('enum', 'struct', 'union'):
196                 if not type: type = words[0]
197                 name = name[len(words[0]):].strip()
198             elif 'property' == words[-1]:
199                 assert('The' == words[0])
200                 owner = link.split('#', 1)[1].split('-', 1)[0]
201                 type, name = 'property', '%s::%s' % (owner, name.split('"')[1])
202             elif 'signal' == words[-1]:
203                 assert('The' == words[0])
204                 owner = link.split('#', 1)[1].split('-', 1)[0]
205                 type, name = 'signal', '%s:%s' % (owner, name.split('"')[1])
207         if not type: return None
209         if None != deprecated: deprecated = Symbol.DeprecationInfo(deprecated)
210         if since: since = Symbol.VersionInfo(since)
212         if name.count(' '):
213             print >>stderr, (
214                 'WARNING: Malformed symbol name: "%s" (type=%s) in %s.' % (
215                 name, type, book.name))
217         return Symbol(book, name, type, link, deprecated, since)
219     def __init__(self, book, name, type, link=None, deprecated=None, since=None):
220         self.__book       = book
221         self.__name       = name
222         self.__type       = type
223         self.__link       = link
224         self.__deprecated = deprecated
225         self.__since      = since
226         self.__matches    = []
228     def __repr__(self):
229         return (
230             '<Symbol: %s, type=%s, since=%s, deprecated=%s>' % (
231             self.name, self.type, self.since, self.deprecated))
234     book       = property(lambda self: self.__book)
235     name       = property(lambda self: self.__name)
236     type       = property(lambda self: self.__type)
237     link       = property(lambda self: self.__link)
238     deprecated = property(lambda self: self.__deprecated)
239     matches    = property(lambda self: self.__matches)
240     since      = property(lambda self: self.__since)
242 def parse_cmdline():
243     options = OptionParser(version="@VERSION@")
245     options.add_option('-b', '--book', dest='books',
246                        help='name of a devhelp book to consider',
247                        default=[], action='append')
248     options.add_option('-d', '--html-dir', metavar='PATH', dest='dirs',
249                        help='path of additional folders with devhelp books',
250                        default=[], action='append')
251     options.add_option('-u', '--list-unknown', action='store_true', default=False,
252                        help='list symbols not found in any book', dest='unknown')
253     options.add_option('-v', '--verbose', action='store_true', default=False,
254                        help='print additional information')
256     return options.parse_args()
258 def merge_gnome_path(options):
259     path = environ.get('GNOME2_PATH')
260     path = path and path.split(':') or []
262     prefix = popen(
263         'pkg-config --variable=prefix glib-2.0'
264         ).readline().rstrip()
266     path.insert(0, prefix)
267     path = filter(None, [p.strip() for p in path])
269     path = [[
270         os.path.join(p, 'share', 'devhelp', 'books'),
271         os.path.join(p, 'share', 'gtk-doc', 'html')]
272         for p in path]
274     path = reduce(list.__add__, path)
275     path = filter(os.path.isdir, path)
277     options.dirs += path
279 if '__main__' == __name__:
280     options, args = parse_cmdline()
282     merge_gnome_path(options)
284     if not options.books:
285         options.books = default_books
287     def trace(message, *args):
288         if options.verbose: print message % args
290     def parse_book(name):
291         try:
292             match = re.match(r'^(.*?)(?::(\d+(?:\.\d+)*))?$', name)
293             name, version = match.groups()
294             trace('reading book: %s', name)
296             version = version and Symbol.VersionInfo(version)
297             return name, Book(name, options.dirs, version)
299         except IOError, e:
300             print >>stderr, 'WARNING: %s.' % e
302     def scan_source_file(name):
303         contents = None
305         try:
306             contents = __comment_regex.sub('', file(name).read())
308         except IOError, e:
309             print >>stderr, e
311         if contents:
312             trace('scanning: %s', name)
313             lines = contents.split('\n')
315             for lineno in range(len(lines)):
316                 for word in __word_regex.findall(lines[lineno]):
317                     symbol = symbols.get(word)
319                     if symbol:
320                         symbol.matches.append((name, lineno, symbol))
322                     elif options.unknown and word.find('_') > 0:
323                         unknown_symbols.append((name, lineno, word))
325     unknown_symbols = []
326     matches, symbols = dict(), dict()
327     books = dict(filter(None, map(parse_book, set(options.books))))
329     for book in books.values():
330         symbols.update(book.symbols)
332     for name in args:
333         if os.path.isdir(name):
334             for path, dirs, files in walk(name):
335                 for f in files:
336                     if f.endswith('.c'):
337                         scan_source_file(os.path.join(path, f))
339         else:
340             scan_source_file(name)
342     matches = []
344     for book in books.values():
345         requirements = book.find_requirements().items()
346         requirements.sort()
348         if requirements:
349             for symbol in requirements[-1][1]:
350                 matches += symbol.matches
352     if options.unknown:
353         matches += unknown_symbols
355     matches.sort()
357     for filename, lineno, symbol in matches:
358         if isinstance(symbol, Symbol):
359             args = filename, lineno, symbol.book.name, symbol.since, symbol.name
360             print '%s:%d: %s-%s required for %s' % args
362         elif options.verbose:
363             print '%s:%d: unknown symbol %s' % (filename, lineno, symbol)
365     if options.unknown:
366         unknown = [m[2].split('_')[0].lower() for m in unknown_symbols]
367         unknown = list(set(unknown))
368         unknown.sort()
370         print 'unknown prefixes: %s' % ', '.join(unknown)
372     raise SystemExit(matches and 1 or 0)