Note when $termprefix and $unprefix were added
[xapian.git] / xapian-bindings / python / doxy2swig.py
blob3f4c24b4bd185bb64c2cde092e61a2267236fa65
1 #!/usr/bin/python
2 """Doxygen XML to SWIG docstring converter.
4 Converts Doxygen generated XML files into a file containing docstrings
5 that can be used by SWIG-1.3.x. Note that you need to get SWIG
6 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
7 the resulting output.
9 Usage:
11 doxy2swig.py input.xml output.i
13 input.xml is your doxygen generated XML file and output.i is where the
14 output will be written (the file will be clobbered).
16 """
18 # This code is implemented using Mark Pilgrim's code as a guideline:
19 # http://www.faqs.org/docs/diveintopython/kgp_divein.html
21 # Author: Prabhu Ramachandran
22 # License: BSD style
25 from xml.dom import minidom
26 import re
27 import sys
28 import textwrap
29 import types
30 import os.path
31 import io
33 def my_open_read(source):
34 if hasattr(source, "read"):
35 return source
36 else:
37 return open(source)
39 def my_open_write(dest):
40 if hasattr(dest, "write"):
41 return dest
42 else:
43 return io.open(dest, 'w', encoding = 'utf8')
46 class Doxy2SWIG:
47 """Converts Doxygen generated XML files into a file containing
48 docstrings that can be used by SWIG-1.3.x that have support for
49 feature("docstring"). Once the data is parsed it is stored in
50 self.pieces.
52 """
54 def __init__(self, src):
55 """Initialize the instance given a source object (file or
56 filename).
58 """
59 f = my_open_read(src)
60 self.my_dir = os.path.dirname(f.name)
61 self.xmldoc = minidom.parse(f).documentElement
62 f.close()
64 self.pieces = []
65 self.pieces.append('\n// File: %s\n'%\
66 os.path.basename(f.name))
68 self.space_re = re.compile(r'\s+')
69 self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
70 self.multi = 0
71 self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
72 'innerclass', 'name', 'declname', 'incdepgraph',
73 'invincdepgraph', 'programlisting', 'type',
74 'references', 'referencedby', 'location',
75 'collaborationgraph', 'reimplements',
76 'reimplementedby', 'derivedcompoundref',
77 'basecompoundref')
78 #self.generics = []
80 def generate(self):
81 """Parses the file set in the initialization. The resulting
82 data is stored in `self.pieces`.
84 """
85 self.parse(self.xmldoc)
87 def parse(self, node):
88 """Parse a given node. This function in turn calls the
89 `parse_<nodeType>` functions which handle the respective
90 nodes.
92 """
93 pm = getattr(self, "parse_%s"%node.__class__.__name__)
94 pm(node)
96 def parse_Document(self, node):
97 self.parse(node.documentElement)
99 def parse_Text(self, node):
100 txt = node.data
101 txt = txt.replace('\\', r'\\\\')
102 txt = txt.replace('"', r'\"')
103 # ignore pure whitespace
104 m = self.space_re.match(txt)
105 if m and len(m.group()) == len(txt):
106 pass
107 else:
108 self.add_text(textwrap.fill(txt))
110 def parse_Element(self, node):
111 """Parse an `ELEMENT_NODE`. This calls specific
112 `do_<tagName>` handers for different elements. If no handler
113 is available the `generic_parse` method is called. All
114 tagNames specified in `self.ignores` are simply ignored.
117 name = node.tagName
118 ignores = self.ignores
119 if name in ignores:
120 return
121 attr = "do_%s" % name
122 if hasattr(self, attr):
123 handlerMethod = getattr(self, attr)
124 handlerMethod(node)
125 else:
126 self.generic_parse(node)
127 #if name not in self.generics: self.generics.append(name)
129 def add_text(self, value):
130 """Adds text corresponding to `value` into `self.pieces`."""
131 if type(value) in (list, tuple):
132 self.pieces.extend(value)
133 else:
134 self.pieces.append(value)
136 def get_specific_nodes(self, node, names):
137 """Given a node and a sequence of strings in `names`, return a
138 dictionary containing the names as keys and child
139 `ELEMENT_NODEs`, that have a `tagName` equal to the name.
142 nodes = [(x.tagName, x) for x in node.childNodes \
143 if x.nodeType == x.ELEMENT_NODE and \
144 x.tagName in names]
145 return dict(nodes)
147 def generic_parse(self, node, pad=0):
148 """A Generic parser for arbitrary tags in a node.
150 Parameters:
152 - node: A node in the DOM.
153 - pad: `int` (default: 0)
155 If 0 the node data is not padded with newlines. If 1 it
156 appends a newline after parsing the childNodes. If 2 it
157 pads before and after the nodes are processed. Defaults to
161 npiece = 0
162 if pad:
163 npiece = len(self.pieces)
164 if pad == 2:
165 self.add_text('\n')
166 for n in node.childNodes:
167 self.parse(n)
168 if pad:
169 if len(self.pieces) > npiece:
170 self.add_text('\n')
172 def space_parse(self, node):
173 self.add_text(' ')
174 self.generic_parse(node)
176 do_ref = space_parse
177 do_emphasis = space_parse
178 do_bold = space_parse
179 do_computeroutput = space_parse
180 do_formula = space_parse
182 def do_compoundname(self, node):
183 self.add_text('\n\n')
184 data = node.firstChild.data
185 self.add_text('%%feature("docstring") %s "\n'%data)
187 def do_compounddef(self, node):
188 kind = node.attributes['kind'].value
189 if kind in ('class', 'struct'):
190 prot = node.attributes['prot'].value
191 if prot != 'public':
192 return
193 names = ('compoundname', 'briefdescription',
194 'detaileddescription', 'includes')
195 first = self.get_specific_nodes(node, names)
196 for n in names:
197 if n in first:
198 self.parse(first[n])
199 self.add_text(['";','\n'])
200 for n in node.childNodes:
201 first_values = list(first.values())
202 if n not in first_values:
203 self.parse(n)
204 elif kind in ('file', 'namespace'):
205 nodes = node.getElementsByTagName('sectiondef')
206 for n in nodes:
207 self.parse(n)
209 def do_includes(self, node):
210 # Don't display C++ includes - Python programmers don't care
211 #self.add_text('C++ includes: ')
212 #self.generic_parse(node, pad=1)
213 pass
215 def do_parameterlist(self, node):
216 self.add_text(['\n', '\n', 'Parameters:', '\n'])
217 self.generic_parse(node, pad=1)
219 def do_para(self, node):
220 self.add_text('\n')
221 self.generic_parse(node, pad=1)
223 def do_parametername(self, node):
224 self.add_text('\n')
225 self.generic_parse(node, pad=0)
226 self.add_text(": ")
228 def do_parameterdefinition(self, node):
229 self.generic_parse(node, pad=1)
231 def do_detaileddescription(self, node):
232 self.generic_parse(node, pad=1)
234 def do_briefdescription(self, node):
235 self.generic_parse(node, pad=1)
237 def do_memberdef(self, node):
238 prot = node.attributes['prot'].value
239 id = node.attributes['id'].value
240 kind = node.attributes['kind'].value
241 tmp = node.parentNode.parentNode.parentNode
242 compdef = tmp.getElementsByTagName('compounddef')[0]
243 cdef_kind = compdef.attributes['kind'].value
245 if prot == 'public':
246 first = self.get_specific_nodes(node, ('briefdescription', 'definition', 'name'))
247 name = first['name'].firstChild.data
248 if name[:8] == 'operator': # Don't handle operators yet.
249 return
251 briefdesc = ''
252 if 'briefdescription' in first:
253 briefdesc = first['briefdescription']
254 defn = first['definition'].firstChild.data
257 self.add_text('\n')
258 self.add_text('%feature("docstring") ')
260 anc = node.parentNode.parentNode
261 if cdef_kind in ('file', 'namespace'):
262 ns_node = anc.getElementsByTagName('innernamespace')
263 if not ns_node and cdef_kind == 'namespace':
264 ns_node = anc.getElementsByTagName('compoundname')
265 if ns_node:
266 ns = ns_node[0].firstChild.data
267 self.add_text(' %s::%s "\n'%(ns, name))
268 else:
269 self.add_text(' %s "\n'%(name))
270 elif cdef_kind in ('class', 'struct'):
271 # Get the full function name.
272 anc_node = anc.getElementsByTagName('compoundname')
273 cname = anc_node[0].firstChild.data
274 self.add_text(' %s::%s "\n'%(cname, name))
275 if briefdesc is not None:
276 self.parse(briefdesc)
277 self.add_text(defn)
279 for n in node.childNodes:
280 first_values = list(first.values())
281 if n not in first_values:
282 self.parse(n)
283 self.add_text(['";', '\n'])
285 def do_definition(self, node):
286 data = node.firstChild.data
287 self.add_text('%s "\n%s'%(data, data))
289 def do_sectiondef(self, node):
290 kind = node.attributes['kind'].value
291 if kind in ('public-func', 'func'):
292 self.generic_parse(node)
294 def do_simplesect(self, node):
295 kind = node.attributes['kind'].value
296 if kind in ('date', 'rcs', 'version'):
297 pass
298 elif kind == 'warning':
299 self.add_text(['\n', 'WARNING: '])
300 self.generic_parse(node)
301 elif kind == 'see':
302 self.add_text('\n')
303 self.add_text('See: ')
304 self.generic_parse(node)
305 else:
306 self.generic_parse(node)
308 def do_argsstring(self, node):
309 self.generic_parse(node, pad=1)
311 def do_member(self, node):
312 kind = node.attributes['kind'].value
313 refid = node.attributes['refid'].value
314 if kind == 'function' and refid[:9] == 'namespace':
315 self.generic_parse(node)
317 def do_doxygenindex(self, node):
318 self.multi = 1
319 comps = node.getElementsByTagName('compound')
320 for c in comps:
321 refid = c.attributes['refid'].value
322 fname = refid + '.xml'
323 if not os.path.exists(fname):
324 fname = os.path.join(self.my_dir, fname)
325 print("parsing file: %s" % fname)
326 p = Doxy2SWIG(fname)
327 p.generate()
328 self.pieces.extend(self.clean_pieces(p.pieces))
330 def write(self, fname):
331 o = my_open_write(fname)
332 if self.multi:
333 o.write("".join(self.pieces))
334 else:
335 o.write("".join(self.clean_pieces(self.pieces)))
336 o.close()
338 def clean_pieces(self, pieces):
339 """Cleans the list of strings given as `pieces`. It replaces
340 multiple newlines by a maximum of 2 and returns a new list.
341 It also wraps the paragraphs nicely.
344 ret = []
345 count = 0
346 for i in pieces:
347 if i == '\n':
348 count = count + 1
349 else:
350 if i == '";':
351 if count:
352 ret.append('\n')
353 elif count > 2:
354 ret.append('\n\n')
355 elif count:
356 ret.append('\n'*count)
357 count = 0
358 ret.append(i)
360 _data = "".join(ret)
361 ret = []
362 for i in _data.split('\n\n'):
363 if i == 'Parameters:':
364 ret.extend(['Parameters:\n-----------', '\n\n'])
365 elif i.find('// File:') > -1: # leave comments alone.
366 ret.extend([i, '\n'])
367 else:
368 _tmp = textwrap.fill(i.strip(), break_long_words=False)
369 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
370 ret.extend([_tmp, '\n\n'])
371 return ret
374 def main(input, output):
375 p = Doxy2SWIG(input)
376 p.generate()
377 p.write(output)
380 if __name__ == '__main__':
381 if len(sys.argv) != 3:
382 print(__doc__)
383 sys.exit(1)
384 main(sys.argv[1], sys.argv[2])