New default option `--compare-sections-by-id` compares sections by
[docutils/kirr.git] / sandbox / rst2gxl / rst2gxl.py
blob6fd1ab6889447e3c7c5b7250de8a2e8d1b96fc50
1 #!/usr/bin/env python
3 # Copyright (C) 2010 Stefan Merten
5 # rst2gxl.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 """
21 Translates a reStructuredText document to GXL_. This can then be
22 transformed to graphs for instance by dot_.
24 .. _GXL: http://www.gupro.de/GXL
25 .. _dot: http://graphviz.org/
26 """
28 __docformat__ = 'reStructuredText'
30 try:
31 import locale
32 locale.setlocale(locale.LC_ALL, '')
33 except:
34 pass
36 import docutils
37 from docutils import frontend, writers, nodes
38 from docutils.core import publish_cmdline, default_description
40 from xml.dom import minidom
42 description = ('Generates GXL from standalone reStructuredText sources. '
43 + default_description)
45 GxlNamespace = "http://www.gupro.de/GXL/gxl-1.1.dtd"
46 GxlTagRoot = "gxl"
47 GxlTagGraph = "graph"
48 GxlTagNode = "node"
49 GxlTagEdge = "edge"
50 GxlAttrId = "id"
51 GxlTagAttr = "attr"
52 GxlAttrName = "name"
53 GxlTagAttrTagName = "name"
54 GxlTagAttrTagNameTag = "string"
55 GxlAttrFrom = "from"
56 GxlAttrTo = "to"
57 GxlAttrEdgemode = "edgemode"
58 GxlValEdgemode = "directed"
60 DuAttrSource = "source"
61 DuAttrIds = "ids"
62 DuAttrNames = "names"
63 DuAttrRefid = "refid"
64 DuAttrRefuri = "refuri"
65 DuAttrClasses = "classes"
66 DuAttrClassesValToc = "contents"
68 class Writer(writers.Writer):
70 supported = ('gxl',)
71 """Formats this writer supports."""
73 settings_spec = (
74 'GXL Writer Options',
75 None,
76 (('Generate XML with indents and newlines. Use this for human '
77 'reading only.',
78 ['--indents'],
79 {'action': 'store_true', 'validator': frontend.validate_boolean}),
80 ('Create a reverse dependency graph. Default is a forward dependency '
81 'graph.',
82 ['--reverse'],
83 {'action': 'store_true', 'validator': frontend.validate_boolean}),
84 ('Create multiple edges between same node if they exist in the '
85 'original document. Default is to unify all edges between two nodes.',
86 ['--multiedge'],
87 {'action': 'store_true', 'validator': frontend.validate_boolean}),
88 ('Select a certain table and ignore the rest of the document. The '
89 'argument must be the name of the table as given in the document or '
90 ' the number of the table counting from 1. '
91 'Default is to consider the whole document. May be given more than '
92 'once.',
93 ['--select-table'],
94 {'action': 'append'}),
95 # TODO The encoding must be specified somehow
99 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
100 'reverse': False, 'multiedge': False,
101 'select_table': [ ]}
103 config_section = 'gxl writer'
104 config_section_dependencies = ('writers',)
106 output = None
107 """Final translated form of `document`."""
109 def translate(self):
110 settings = self.document.settings
111 indent = newline = ''
112 if settings.indents:
113 indent = ' '
114 newline = '\n'
115 visitor = GXLTranslator(self.document)
116 self.document.walkabout(visitor)
117 doc = self.nodes2Glx(self.document, visitor.anchors,
118 visitor.references, settings.reverse,
119 not settings.multiedge)
120 self.output = doc.toprettyxml(indent, newline)
121 doc.unlink()
123 def nodes2Glx(self, document, anchors, references, doReverse, doUnify):
124 """Translate nodes and edges to a GXL DOM"""
126 impl = minidom.getDOMImplementation()
127 doctype = impl.createDocumentType(GxlTagRoot, None, GxlNamespace)
128 doc = impl.createDocument(None, GxlTagRoot, doctype)
129 graph = doc.createElement(GxlTagGraph)
130 graph.setAttribute(GxlAttrId, document[DuAttrSource])
131 graph.setAttribute(GxlAttrEdgemode, GxlValEdgemode)
132 doc.documentElement.appendChild(graph)
134 for anchor in anchors:
135 anchor.renderGlx(doc, graph)
137 for reference in references:
138 reference.resolve(anchors)
140 valids = [ ]
141 for reference in references:
142 reference.gatherValids(valids, doUnify)
143 references = valids
145 for reference in references:
146 reference.renderGlx(doc, graph, doReverse)
148 return doc
150 class GXLTranslator(nodes.GenericNodeVisitor):
152 """The list of anchors found by traversing"""
153 anchors = [ ]
155 """The last anchor found"""
156 lastAnchor = None
158 """The list of ``GEdge``\s found by traversing"""
159 references = [ ]
161 """Stack for being currently in a selected part"""
162 inSelected = [ ]
164 """Counter for selecting a table by number"""
165 tablesSeen = 0
167 def __init__(self, document):
168 nodes.GenericNodeVisitor.__init__(self, document)
169 if document.settings.select_table:
170 self.inSelected.append(False)
171 else:
172 self.inSelected.append(True)
174 def default_visit(self, node):
175 if self.isSelected(node, True):
176 self.inSelected.append(True)
177 elif self.unSelected(node):
178 self.inSelected.append(False)
179 if self.inSelected[-1]:
180 self.processNode(node)
182 def default_departure(self, node):
183 if self.isSelected(node, False) or self.unSelected(node):
184 self.inSelected.pop()
186 def isSelected(self, node, entering):
187 if (self.document.settings.select_table
188 and isinstance(node, nodes.table)):
189 if entering:
190 self.tablesSeen += 1
191 visitor = FirstTitleGatherer(self.document)
192 node.walkabout(visitor)
193 title = visitor.text
194 for wantedTable in self.document.settings.select_table:
195 try:
196 if int(wantedTable) == self.tablesSeen:
197 return True
198 except:
199 if wantedTable == title:
200 return True
201 return False
203 def unSelected(self, node):
204 # TOCs are never selected
205 return (isinstance(node, nodes.topic)
206 and DuAttrClassesValToc in node.get(DuAttrClasses, ( )))
208 def processNode(self, node):
209 if Anchor.isAnchor(node):
210 self.lastAnchor = anchor = Anchor(node, self.document)
211 self.anchors.append(anchor)
212 if Reference.isReference(node):
213 reference = Reference(node, self.lastAnchor)
214 self.references.append(reference)
216 class Anchor(object):
217 """An anchor in the source"""
219 """The source node"""
220 node = None
222 """The name of the node"""
223 _name = None
225 def __init__(self, node, document):
226 self.node = node
227 self.document = document
229 def renderGlx(self, doc, graph):
230 eNode = doc.createElement(GxlTagNode)
231 graph.appendChild(eNode)
232 eNode.setAttribute(GxlAttrId, self.ids()[0])
234 eAttr = doc.createElement(GxlTagAttr)
235 eNode.appendChild(eAttr)
236 eAttr.setAttribute(GxlAttrName, GxlTagAttrTagName)
238 eContent = doc.createElement(GxlTagAttrTagNameTag)
239 eAttr.appendChild(eContent)
240 eContent.appendChild(doc.createTextNode(self.name()))
242 def name(self):
243 if self._name is None:
244 if isinstance(self.node, nodes.Structural):
245 visitor = FirstTitleGatherer(self.document)
246 else:
247 visitor = TextGatherer(self.document)
248 self.node.walkabout(visitor)
249 self._name = visitor.text
250 return self._name
252 def ids(self):
253 return self.node[DuAttrIds]
255 @staticmethod
256 def isAnchor(node):
257 """``True`` if the node can be an ``Anchor``"""
258 # TODO What is considered an anchor needs to be subject to an option
259 return bool((isinstance(node, nodes.target)
260 or isinstance(node, nodes.Structural))
261 and node[DuAttrIds]
262 and not node.get(DuAttrRefuri, None))
264 class Reference(object):
265 """A reference in the source"""
267 """The source node"""
268 node = None
270 """The last anchor seen before this reference"""
271 fromAnchor = None
273 """The anchor this points to"""
274 toAnchor = None
276 def __init__(self, node, fromAnchor):
277 self.node = node
278 self.fromAnchor = fromAnchor
280 def renderGlx(self, doc, graph, doReverse):
281 if self.fromAnchor is None:
282 # No anchor to start edge from
283 # TODO Should result in a warning
284 return
286 eEdge = doc.createElement(GxlTagEdge)
287 graph.appendChild(eEdge)
288 fromAttr = GxlAttrFrom
289 toAttr = GxlAttrTo
290 if doReverse:
291 ( fromAttr, toAttr ) = ( toAttr, fromAttr )
292 eEdge.setAttribute(toAttr, self.toAnchor.name())
293 # TODO There should be several ways to identify the "from" node
294 eEdge.setAttribute(fromAttr, self.fromAnchor.name())
296 def resolve(self, anchors):
297 """Resolve this reference against the anchors given."""
299 for anchor in anchors:
300 if self.node[DuAttrRefid] in anchor.ids():
301 self.toAnchor = anchor
302 break
304 def gatherValids(self, valids, doUnify):
305 """Checks whether the current reference appears in the list given. If
306 If not adds the current reference and returns ``True``"""
308 if not self.fromAnchor or not self.toAnchor:
309 return
310 if doUnify:
311 for unique in valids:
312 if (self.fromAnchor == unique.fromAnchor and
313 self.toAnchor == unique.toAnchor):
314 return
315 valids.append(self)
317 @staticmethod
318 def isReference(node):
319 """``True`` if the node can be a ``Reference``"""
320 return bool(isinstance(node, nodes.Referential)
321 and node.get(DuAttrRefid, None))
323 class TextGatherer(nodes.SparseNodeVisitor):
324 """A visitor gathering text."""
326 """Gathered text"""
327 text = ""
329 gather = True
331 def visit_generated(self, node):
332 self.gather = False
334 def depart_generated(self, node):
335 self.gather = True
337 def visit_Text(self, node):
338 if self.gather:
339 self.text += node.astext()
341 class FirstTitleGatherer(nodes.SparseNodeVisitor):
342 """A visitor gathering text in first title."""
344 """Gathered text"""
345 text = ""
347 gather = False
348 found = False
349 skip = False
351 def visit_title(self, node):
352 self.gather = not self.found
354 def depart_title(self, node):
355 self.gather = False
356 self.found = True
358 def visit_generated(self, node):
359 self.skip = True
361 def depart_generated(self, node):
362 self.skip = False
364 def visit_Text(self, node):
365 if self.gather and not self.skip:
366 self.text += node.astext()
368 publish_cmdline(writer=Writer(), description=description)