2 """Doxygen XML to SWIG docstring converter.
4 Converts Doxygen generated XML files into a file containing docstrings
5 that can be used by SWIG-1.3.x. Note that you need to get SWIG
6 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
11 doxy2swig.py input.xml output.i
13 input.xml is your doxygen generated XML file and output.i is where the
14 output will be written (the file will be clobbered).
18 # This code is implemented using Mark Pilgrim's code as a guideline:
19 # http://www.faqs.org/docs/diveintopython/kgp_divein.html
21 # Author: Prabhu Ramachandran
25 from xml
.dom
import minidom
33 def my_open_read(source
):
34 if hasattr(source
, "read"):
39 def my_open_write(dest
):
40 if hasattr(dest
, "write"):
43 return io
.open(dest
, 'w', encoding
= 'utf8')
47 """Converts Doxygen generated XML files into a file containing
48 docstrings that can be used by SWIG-1.3.x that have support for
49 feature("docstring"). Once the data is parsed it is stored in
54 def __init__(self
, src
):
55 """Initialize the instance given a source object (file or
60 self
.my_dir
= os
.path
.dirname(f
.name
)
61 self
.xmldoc
= minidom
.parse(f
).documentElement
65 self
.pieces
.append('\n// File: %s\n'%\
66 os
.path
.basename(f
.name
))
68 self
.space_re
= re
.compile(r
'\s+')
69 self
.lead_spc
= re
.compile(r
'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
71 self
.ignores
= ('inheritancegraph', 'param', 'listofallmembers',
72 'innerclass', 'name', 'declname', 'incdepgraph',
73 'invincdepgraph', 'programlisting', 'type',
74 'references', 'referencedby', 'location',
75 'collaborationgraph', 'reimplements',
76 'reimplementedby', 'derivedcompoundref',
81 """Parses the file set in the initialization. The resulting
82 data is stored in `self.pieces`.
85 self
.parse(self
.xmldoc
)
87 def parse(self
, node
):
88 """Parse a given node. This function in turn calls the
89 `parse_<nodeType>` functions which handle the respective
93 pm
= getattr(self
, "parse_%s"%node
.__class
__.__name
__)
96 def parse_Document(self
, node
):
97 self
.parse(node
.documentElement
)
99 def parse_Text(self
, node
):
101 txt
= txt
.replace('\\', r
'\\\\')
102 txt
= txt
.replace('"', r
'\"')
103 # ignore pure whitespace
104 m
= self
.space_re
.match(txt
)
105 if m
and len(m
.group()) == len(txt
):
108 self
.add_text(textwrap
.fill(txt
))
110 def parse_Element(self
, node
):
111 """Parse an `ELEMENT_NODE`. This calls specific
112 `do_<tagName>` handers for different elements. If no handler
113 is available the `generic_parse` method is called. All
114 tagNames specified in `self.ignores` are simply ignored.
118 ignores
= self
.ignores
121 attr
= "do_%s" % name
122 if hasattr(self
, attr
):
123 handlerMethod
= getattr(self
, attr
)
126 self
.generic_parse(node
)
127 #if name not in self.generics: self.generics.append(name)
129 def add_text(self
, value
):
130 """Adds text corresponding to `value` into `self.pieces`."""
131 if type(value
) in (list, tuple):
132 self
.pieces
.extend(value
)
134 self
.pieces
.append(value
)
136 def get_specific_nodes(self
, node
, names
):
137 """Given a node and a sequence of strings in `names`, return a
138 dictionary containing the names as keys and child
139 `ELEMENT_NODEs`, that have a `tagName` equal to the name.
142 nodes
= [(x
.tagName
, x
) for x
in node
.childNodes \
143 if x
.nodeType
== x
.ELEMENT_NODE
and \
147 def generic_parse(self
, node
, pad
=0):
148 """A Generic parser for arbitrary tags in a node.
152 - node: A node in the DOM.
153 - pad: `int` (default: 0)
155 If 0 the node data is not padded with newlines. If 1 it
156 appends a newline after parsing the childNodes. If 2 it
157 pads before and after the nodes are processed. Defaults to
163 npiece
= len(self
.pieces
)
166 for n
in node
.childNodes
:
169 if len(self
.pieces
) > npiece
:
172 def space_parse(self
, node
):
174 self
.generic_parse(node
)
177 do_emphasis
= space_parse
178 do_bold
= space_parse
179 do_computeroutput
= space_parse
180 do_formula
= space_parse
182 def do_compoundname(self
, node
):
183 self
.add_text('\n\n')
184 data
= node
.firstChild
.data
185 self
.add_text('%%feature("docstring") %s "\n'%data
)
187 def do_compounddef(self
, node
):
188 kind
= node
.attributes
['kind'].value
189 if kind
in ('class', 'struct'):
190 prot
= node
.attributes
['prot'].value
193 names
= ('compoundname', 'briefdescription',
194 'detaileddescription', 'includes')
195 first
= self
.get_specific_nodes(node
, names
)
199 self
.add_text(['";','\n'])
200 for n
in node
.childNodes
:
201 first_values
= list(first
.values())
202 if n
not in first_values
:
204 elif kind
in ('file', 'namespace'):
205 nodes
= node
.getElementsByTagName('sectiondef')
209 def do_includes(self
, node
):
210 # Don't display C++ includes - Python programmers don't care
211 #self.add_text('C++ includes: ')
212 #self.generic_parse(node, pad=1)
215 def do_parameterlist(self
, node
):
216 self
.add_text(['\n', '\n', 'Parameters:', '\n'])
217 self
.generic_parse(node
, pad
=1)
219 def do_para(self
, node
):
221 self
.generic_parse(node
, pad
=1)
223 def do_parametername(self
, node
):
225 self
.generic_parse(node
, pad
=0)
228 def do_parameterdefinition(self
, node
):
229 self
.generic_parse(node
, pad
=1)
231 def do_detaileddescription(self
, node
):
232 self
.generic_parse(node
, pad
=1)
234 def do_briefdescription(self
, node
):
235 self
.generic_parse(node
, pad
=1)
237 def do_memberdef(self
, node
):
238 prot
= node
.attributes
['prot'].value
239 id = node
.attributes
['id'].value
240 kind
= node
.attributes
['kind'].value
241 tmp
= node
.parentNode
.parentNode
.parentNode
242 compdef
= tmp
.getElementsByTagName('compounddef')[0]
243 cdef_kind
= compdef
.attributes
['kind'].value
246 first
= self
.get_specific_nodes(node
, ('briefdescription', 'definition', 'name'))
247 name
= first
['name'].firstChild
.data
248 if name
[:8] == 'operator': # Don't handle operators yet.
252 if 'briefdescription' in first
:
253 briefdesc
= first
['briefdescription']
254 defn
= first
['definition'].firstChild
.data
258 self
.add_text('%feature("docstring") ')
260 anc
= node
.parentNode
.parentNode
261 if cdef_kind
in ('file', 'namespace'):
262 ns_node
= anc
.getElementsByTagName('innernamespace')
263 if not ns_node
and cdef_kind
== 'namespace':
264 ns_node
= anc
.getElementsByTagName('compoundname')
266 ns
= ns_node
[0].firstChild
.data
267 self
.add_text(' %s::%s "\n'%(ns
, name
))
269 self
.add_text(' %s "\n'%(name))
270 elif cdef_kind
in ('class', 'struct'):
271 # Get the full function name.
272 anc_node
= anc
.getElementsByTagName('compoundname')
273 cname
= anc_node
[0].firstChild
.data
274 self
.add_text(' %s::%s "\n'%(cname
, name
))
275 if briefdesc
is not None:
276 self
.parse(briefdesc
)
279 for n
in node
.childNodes
:
280 first_values
= list(first
.values())
281 if n
not in first_values
:
283 self
.add_text(['";', '\n'])
285 def do_definition(self
, node
):
286 data
= node
.firstChild
.data
287 self
.add_text('%s "\n%s'%(data
, data
))
289 def do_sectiondef(self
, node
):
290 kind
= node
.attributes
['kind'].value
291 if kind
in ('public-func', 'func'):
292 self
.generic_parse(node
)
294 def do_simplesect(self
, node
):
295 kind
= node
.attributes
['kind'].value
296 if kind
in ('date', 'rcs', 'version'):
298 elif kind
== 'warning':
299 self
.add_text(['\n', 'WARNING: '])
300 self
.generic_parse(node
)
303 self
.add_text('See: ')
304 self
.generic_parse(node
)
306 self
.generic_parse(node
)
308 def do_argsstring(self
, node
):
309 self
.generic_parse(node
, pad
=1)
311 def do_member(self
, node
):
312 kind
= node
.attributes
['kind'].value
313 refid
= node
.attributes
['refid'].value
314 if kind
== 'function' and refid
[:9] == 'namespace':
315 self
.generic_parse(node
)
317 def do_doxygenindex(self
, node
):
319 comps
= node
.getElementsByTagName('compound')
321 refid
= c
.attributes
['refid'].value
322 fname
= refid
+ '.xml'
323 if not os
.path
.exists(fname
):
324 fname
= os
.path
.join(self
.my_dir
, fname
)
325 print("parsing file: %s" % fname
)
328 self
.pieces
.extend(self
.clean_pieces(p
.pieces
))
330 def write(self
, fname
):
331 o
= my_open_write(fname
)
333 o
.write("".join(self
.pieces
))
335 o
.write("".join(self
.clean_pieces(self
.pieces
)))
338 def clean_pieces(self
, pieces
):
339 """Cleans the list of strings given as `pieces`. It replaces
340 multiple newlines by a maximum of 2 and returns a new list.
341 It also wraps the paragraphs nicely.
356 ret
.append('\n'*count
)
362 for i
in _data
.split('\n\n'):
363 if i
== 'Parameters:':
364 ret
.extend(['Parameters:\n-----------', '\n\n'])
365 elif i
.find('// File:') > -1: # leave comments alone.
366 ret
.extend([i
, '\n'])
368 _tmp
= textwrap
.fill(i
.strip(), break_long_words
=False)
369 _tmp
= self
.lead_spc
.sub(r
'\1"\2', _tmp
)
370 ret
.extend([_tmp
, '\n\n'])
374 def main(input, output
):
380 if __name__
== '__main__':
381 if len(sys
.argv
) != 3:
384 main(sys
.argv
[1], sys
.argv
[2])