xapian-bindings/python/doxy2swig.py

   1 #!/usr/bin/python
   2 """Doxygen XML to SWIG docstring converter.
   3
   4 Converts Doxygen generated XML files into a file containing docstrings
   5 that can be used by SWIG-1.3.x.  Note that you need to get SWIG
   6 version > 1.3.23 or use Robin Dunn's docstring patch to be able to use
   7 the resulting output.
   8
   9 Usage:
  10
  11   doxy2swig.py input.xml output.i
  12
  13 input.xml is your doxygen generated XML file and output.i is where the
  14 output will be written (the file will be clobbered).
  15
  16 """
  17
  18 # This code is implemented using Mark Pilgrim's code as a guideline:
  19 #   http://www.faqs.org/docs/diveintopython/kgp_divein.html
  20 #
  21 # Author: Prabhu Ramachandran
  22 # License: BSD style
  23
  24
  25 from xml.dom import minidom
  26 import re
  27 import sys
  28 import textwrap
  29 import types
  30 import os.path
  31 import io
  32
  33 def my_open_read(source):
  34     if hasattr(source, "read"):
  35         return source
  36     else:
  37         return open(source)
  38
  39 def my_open_write(dest):
  40     if hasattr(dest, "write"):
  41         return dest
  42     else:
  43         return io.open(dest, 'w', encoding = 'utf8')
  44
  45
  46 class Doxy2SWIG:
  47     """Converts Doxygen generated XML files into a file containing
  48     docstrings that can be used by SWIG-1.3.x that have support for
  49     feature("docstring").  Once the data is parsed it is stored in
  50     self.pieces.
  51
  52     """
  53
  54     def __init__(self, src):
  55         """Initialize the instance given a source object (file or
  56         filename).
  57
  58         """
  59         f = my_open_read(src)
  60         self.my_dir = os.path.dirname(f.name)
  61         self.xmldoc = minidom.parse(f).documentElement
  62         f.close()
  63
  64         self.pieces = []
  65         self.pieces.append('\n// File: %s\n'%\
  66                            os.path.basename(f.name))
  67
  68         self.space_re = re.compile(r'\s+')
  69         self.lead_spc = re.compile(r'^(%feature\S+\s+\S+\s*?)"\s+(\S)')
  70         self.multi = 0
  71         self.ignores = ('inheritancegraph', 'param', 'listofallmembers',
  72                         'innerclass', 'name', 'declname', 'incdepgraph',
  73                         'invincdepgraph', 'programlisting', 'type',
  74                         'references', 'referencedby', 'location',
  75                         'collaborationgraph', 'reimplements',
  76                         'reimplementedby', 'derivedcompoundref',
  77                         'basecompoundref')
  78         #self.generics = []
  79
  80     def generate(self):
  81         """Parses the file set in the initialization.  The resulting
  82         data is stored in `self.pieces`.
  83
  84         """
  85         self.parse(self.xmldoc)
  86
  87     def parse(self, node):
  88         """Parse a given node.  This function in turn calls the
  89         `parse_<nodeType>` functions which handle the respective
  90         nodes.
  91
  92         """
  93         pm = getattr(self, "parse_%s"%node.__class__.__name__)
  94         pm(node)
  95
  96     def parse_Document(self, node):
  97         self.parse(node.documentElement)
  98
  99     def parse_Text(self, node):
 100         txt = node.data
 101         txt = txt.replace('\\', r'\\\\')
 102         txt = txt.replace('"', r'\"')
 103         # ignore pure whitespace
 104         m = self.space_re.match(txt)
 105         if m and len(m.group()) == len(txt):
 106             pass
 107         else:
 108             self.add_text(textwrap.fill(txt))
 109
 110     def parse_Element(self, node):
 111         """Parse an `ELEMENT_NODE`.  This calls specific
 112         `do_<tagName>` handers for different elements.  If no handler
 113         is available the `generic_parse` method is called.  All
 114         tagNames specified in `self.ignores` are simply ignored.
 115
 116         """
 117         name = node.tagName
 118         ignores = self.ignores
 119         if name in ignores:
 120             return
 121         attr = "do_%s" % name
 122         if hasattr(self, attr):
 123             handlerMethod = getattr(self, attr)
 124             handlerMethod(node)
 125         else:
 126             self.generic_parse(node)
 127             #if name not in self.generics: self.generics.append(name)
 128
 129     def add_text(self, value):
 130         """Adds text corresponding to `value` into `self.pieces`."""
 131         if type(value) in (list, tuple):
 132             self.pieces.extend(value)
 133         else:
 134             self.pieces.append(value)
 135
 136     def get_specific_nodes(self, node, names):
 137         """Given a node and a sequence of strings in `names`, return a
 138         dictionary containing the names as keys and child
 139         `ELEMENT_NODEs`, that have a `tagName` equal to the name.
 140
 141         """
 142         nodes = [(x.tagName, x) for x in node.childNodes \
 143                  if x.nodeType == x.ELEMENT_NODE and \
 144                  x.tagName in names]
 145         return dict(nodes)
 146
 147     def generic_parse(self, node, pad=0):
 148         """A Generic parser for arbitrary tags in a node.
 149
 150         Parameters:
 151
 152          - node:  A node in the DOM.
 153          - pad: `int` (default: 0)
 154
 155            If 0 the node data is not padded with newlines.  If 1 it
 156            appends a newline after parsing the childNodes.  If 2 it
 157            pads before and after the nodes are processed.  Defaults to
 158            0.
 159
 160         """
 161         npiece = 0
 162         if pad:
 163             npiece = len(self.pieces)
 164             if pad == 2:
 165                 self.add_text('\n')
 166         for n in node.childNodes:
 167             self.parse(n)
 168         if pad:
 169             if len(self.pieces) > npiece:
 170                 self.add_text('\n')
 171
 172     def space_parse(self, node):
 173         self.add_text(' ')
 174         self.generic_parse(node)
 175
 176     do_ref = space_parse
 177     do_emphasis = space_parse
 178     do_bold = space_parse
 179     do_computeroutput = space_parse
 180     do_formula = space_parse
 181
 182     def do_compoundname(self, node):
 183         self.add_text('\n\n')
 184         data = node.firstChild.data
 185         self.add_text('%%feature("docstring") %s "\n'%data)
 186
 187     def do_compounddef(self, node):
 188         kind = node.attributes['kind'].value
 189         if kind in ('class', 'struct'):
 190             prot = node.attributes['prot'].value
 191             if prot != 'public':
 192                 return
 193             names = ('compoundname', 'briefdescription',
 194                      'detaileddescription', 'includes')
 195             first = self.get_specific_nodes(node, names)
 196             for n in names:
 197                 if n in first:
 198                     self.parse(first[n])
 199             self.add_text(['";','\n'])
 200             for n in node.childNodes:
 201                 first_values = list(first.values())
 202                 if n not in first_values:
 203                     self.parse(n)
 204         elif kind in ('file', 'namespace'):
 205             nodes = node.getElementsByTagName('sectiondef')
 206             for n in nodes:
 207                 self.parse(n)
 208
 209     def do_includes(self, node):
 210         # Don't display C++ includes - Python programmers don't care
 211         #self.add_text('C++ includes: ')
 212         #self.generic_parse(node, pad=1)
 213         pass
 214
 215     def do_parameterlist(self, node):
 216         self.add_text(['\n', '\n', 'Parameters:', '\n'])
 217         self.generic_parse(node, pad=1)
 218
 219     def do_para(self, node):
 220         self.add_text('\n')
 221         self.generic_parse(node, pad=1)
 222
 223     def do_parametername(self, node):
 224         self.add_text('\n')
 225         self.generic_parse(node, pad=0)
 226         self.add_text(": ")
 227
 228     def do_parameterdefinition(self, node):
 229         self.generic_parse(node, pad=1)
 230
 231     def do_detaileddescription(self, node):
 232         self.generic_parse(node, pad=1)
 233
 234     def do_briefdescription(self, node):
 235         self.generic_parse(node, pad=1)
 236
 237     def do_memberdef(self, node):
 238         prot = node.attributes['prot'].value
 239         id = node.attributes['id'].value
 240         kind = node.attributes['kind'].value
 241         tmp = node.parentNode.parentNode.parentNode
 242         compdef = tmp.getElementsByTagName('compounddef')[0]
 243         cdef_kind = compdef.attributes['kind'].value
 244
 245         if prot == 'public':
 246             first = self.get_specific_nodes(node, ('briefdescription', 'definition', 'name'))
 247             name = first['name'].firstChild.data
 248             if name[:8] == 'operator': # Don't handle operators yet.
 249                 return
 250
 251             briefdesc = ''
 252             if 'briefdescription' in first:
 253                 briefdesc = first['briefdescription']
 254             defn = first['definition'].firstChild.data
 255
 256
 257             self.add_text('\n')
 258             self.add_text('%feature("docstring") ')
 259
 260             anc = node.parentNode.parentNode
 261             if cdef_kind in ('file', 'namespace'):
 262                 ns_node = anc.getElementsByTagName('innernamespace')
 263                 if not ns_node and cdef_kind == 'namespace':
 264                     ns_node = anc.getElementsByTagName('compoundname')
 265                 if ns_node:
 266                     ns = ns_node[0].firstChild.data
 267                     self.add_text(' %s::%s "\n'%(ns, name))
 268                 else:
 269                     self.add_text(' %s "\n'%(name))
 270             elif cdef_kind in ('class', 'struct'):
 271                 # Get the full function name.
 272                 anc_node = anc.getElementsByTagName('compoundname')
 273                 cname = anc_node[0].firstChild.data
 274                 self.add_text(' %s::%s "\n'%(cname, name))
 275             if briefdesc is not None:
 276                 self.parse(briefdesc)
 277             self.add_text(defn)
 278
 279             for n in node.childNodes:
 280                 first_values = list(first.values())
 281                 if n not in first_values:
 282                     self.parse(n)
 283             self.add_text(['";', '\n'])
 284
 285     def do_definition(self, node):
 286         data = node.firstChild.data
 287         self.add_text('%s "\n%s'%(data, data))
 288
 289     def do_sectiondef(self, node):
 290         kind = node.attributes['kind'].value
 291         if kind in ('public-func', 'func'):
 292             self.generic_parse(node)
 293
 294     def do_simplesect(self, node):
 295         kind = node.attributes['kind'].value
 296         if kind in ('date', 'rcs', 'version'):
 297             pass
 298         elif kind == 'warning':
 299             self.add_text(['\n', 'WARNING: '])
 300             self.generic_parse(node)
 301         elif kind == 'see':
 302             self.add_text('\n')
 303             self.add_text('See: ')
 304             self.generic_parse(node)
 305         else:
 306             self.generic_parse(node)
 307
 308     def do_argsstring(self, node):
 309         self.generic_parse(node, pad=1)
 310
 311     def do_member(self, node):
 312         kind = node.attributes['kind'].value
 313         refid = node.attributes['refid'].value
 314         if kind == 'function' and refid[:9] == 'namespace':
 315             self.generic_parse(node)
 316
 317     def do_doxygenindex(self, node):
 318         self.multi = 1
 319         comps = node.getElementsByTagName('compound')
 320         for c in comps:
 321             refid = c.attributes['refid'].value
 322             fname = refid + '.xml'
 323             if not os.path.exists(fname):
 324                 fname = os.path.join(self.my_dir,  fname)
 325             print("parsing file: %s" % fname)
 326             p = Doxy2SWIG(fname)
 327             p.generate()
 328             self.pieces.extend(self.clean_pieces(p.pieces))
 329
 330     def write(self, fname):
 331         o = my_open_write(fname)
 332         if self.multi:
 333             o.write("".join(self.pieces))
 334         else:
 335             o.write("".join(self.clean_pieces(self.pieces)))
 336         o.close()
 337
 338     def clean_pieces(self, pieces):
 339         """Cleans the list of strings given as `pieces`.  It replaces
 340         multiple newlines by a maximum of 2 and returns a new list.
 341         It also wraps the paragraphs nicely.
 342
 343         """
 344         ret = []
 345         count = 0
 346         for i in pieces:
 347             if i == '\n':
 348                 count = count + 1
 349             else:
 350                 if i == '";':
 351                     if count:
 352                         ret.append('\n')
 353                 elif count > 2:
 354                     ret.append('\n\n')
 355                 elif count:
 356                     ret.append('\n'*count)
 357                 count = 0
 358                 ret.append(i)
 359
 360         _data = "".join(ret)
 361         ret = []
 362         for i in _data.split('\n\n'):
 363             if i == 'Parameters:':
 364                 ret.extend(['Parameters:\n-----------', '\n\n'])
 365             elif i.find('// File:') > -1: # leave comments alone.
 366                 ret.extend([i, '\n'])
 367             else:
 368                 _tmp = textwrap.fill(i.strip(), break_long_words=False)
 369                 _tmp = self.lead_spc.sub(r'\1"\2', _tmp)
 370                 ret.extend([_tmp, '\n\n'])
 371         return ret
 372
 373
 374 def main(input, output):
 375     p = Doxy2SWIG(input)
 376     p.generate()
 377     p.write(output)
 378
 379
 380 if __name__ == '__main__':
 381     if len(sys.argv) != 3:
 382         print(__doc__)
 383         sys.exit(1)
 384     main(sys.argv[1], sys.argv[2])