Add <target> to one more testcase (see r8206).
[docutils.git] / sandbox / rst2graph / rst2gxl.py
blobf59e978bc02a79f84b14f6f0811e9dc041fc7c9c
1 #!/usr/bin/env python
3 # Copyright (C) 2010, 2013 Stefan Merten
5 # rst2graph.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
18 # 02111-1307, USA.
20 """
21 Translates the links in a reStructuredText document to a graph. The graph can
22 be output in various formats such as GXL_ or dot_. This graph can then be
23 converted to a graphical representation of the link structure of the document.
25 .. _GXL: http://www.gupro.de/GXL
26 .. _dot: http://www.graphviz.org/content/dot-language
27 """
29 __docformat__ = 'reStructuredText'
31 try:
32 import locale
33 locale.setlocale(locale.LC_ALL, '')
34 except:
35 pass
37 import docutils
38 from docutils import frontend, writers, nodes, utils
39 from docutils.core import publish_cmdline, default_description
41 from xml.dom import minidom
43 import re
44 import sys
45 import os.path
47 try:
48 from pygraphviz import AGraph
49 pygraphvizAvail = True
50 except:
51 pygraphvizAvail = False
53 description = ('Generates GXL from standalone reStructuredText sources. '
54 + default_description)
56 # `gxl2gv` supports V1.0 only
57 GxlNamespace = "http://www.gupro.de/GXL/gxl-1.0.dtd"
58 GxlTagRoot = "gxl"
59 GxlTagGraph = "graph"
60 GxlTagNode = "node"
61 GxlTagEdge = "edge"
62 GxlAttrId = "id"
63 GxlTagAttr = "attr"
64 GxlAttrName = "name"
65 GxlTagAttrTagName = "name"
66 GxlTagAttrTagNameTag = "string"
67 GxlAttrFrom = "from"
68 GxlAttrTo = "to"
69 GxlAttrEdgemode = "edgemode"
70 GxlValEdgemode = "directed"
72 DuAttrSource = "source"
73 DuAttrIds = "ids"
74 DuAttrNames = "names"
75 DuAttrRefid = "refid"
76 DuAttrRefuri = "refuri"
77 DuAttrClasses = "classes"
78 DuAttrClassesValToc = "contents"
80 CmdRst2Gxl = "rst2gxl"
81 CmdRst2Dot = "rst2dot"
82 CmdRst2Gv = "rst2gv"
84 ##############################################################################
85 ##############################################################################
86 # Functions
88 def string2XMLName(s):
89 """Return an XML Name similar to the string given."""
90 s = re.sub(r"(?u)[^-.:\w]", "_", s)
91 return re.sub("^(?u)[^:\w]", "_", s)
93 ##############################################################################
94 ##############################################################################
96 class Writer(writers.Writer):
98 """Formats this writer supports."""
99 supported = ('gxl', 'gv', 'dot')
101 settings_spec = (
102 'Graph Writer Format Options',
103 'The following options determine the output format produced.',
105 ('Produce output in the dot language suitable for Graphviz. '
106 'Default when called as rst2gv or rst2dot.',
107 ['--dot', '--gv'],
108 {'action': 'store_true', 'validator': frontend.validate_boolean}),
109 ('Produce GXL output. '
110 'Default when called as rst2gxl.',
111 ['--gxl'],
112 {'action': 'store_true', 'validator': frontend.validate_boolean}),
114 'Graph Writer Global Options',
115 'The following options affect the way the graph is produced.',
117 ('Put only nodes with connections to the graph. '
118 'Default is to put all nodes to the graph.',
119 ['--connected-only'],
120 {'action': 'store_true', 'validator': frontend.validate_boolean}),
121 ('Create multiple edges between same node if they exist in the '
122 'original document. Default is to unify all edges between two nodes.',
123 ['--multiedge'],
124 {'action': 'store_true', 'validator': frontend.validate_boolean}),
125 ('Create a reverse dependency graph. Default is a forward dependency '
126 'graph.',
127 ['--reverse'],
128 {'action': 'store_true', 'validator': frontend.validate_boolean}),
129 ('Select a certain table and ignore the rest of the document. The '
130 'argument must be the name of the table as given in the document or '
131 'the number of the table counting from 1. '
132 'Default is to consider the whole document. May be given more than '
133 'once.',
134 ['--select-table'],
135 {'action': 'append'}),
136 ('Use the whole path of section titles up to a section as label for '
137 'a node. Default is to use only the section title.',
138 ['--title-path'],
139 {'action': 'store_true', 'validator': frontend.validate_boolean}),
141 'Graph Writer GXL Options',
142 'The following options are valid only for GXL format output.',
144 ('Generate XML with indents and newlines.',
145 ['--indents'],
146 {'action': 'store_true', 'validator': frontend.validate_boolean}),
150 settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace',
151 'reverse': False,
152 'multiedge': False,
153 'select_table': [ ],
154 'indents': False,
155 'gxl': False,
156 'dot': False,
157 'connected_only': False,
158 'title_path': False,
161 config_section = 'graph writer'
162 config_section_dependencies = ('writers',)
164 """Final translated form of `document`."""
165 output = None
167 def translate(self):
168 settings = self.document.settings
169 if settings.gxl and settings.dot:
170 self.document.reporter.severe("Options --gxl and --dot/--gv are mutual exclusive")
171 elif not settings.gxl and not settings.dot:
172 if len(sys.argv):
173 cmd = os.path.basename(sys.argv[0]).lower()
174 if cmd.startswith(CmdRst2Gxl):
175 settings.gxl = True
176 elif cmd.startswith(CmdRst2Dot) or cmd.startswith(CmdRst2Gv):
177 settings.dot = True
178 if not settings.gxl and not settings.dot:
179 self.document.reporter.severe("One of --gxl and --dot/--gv must be given or implied")
180 if not settings.gxl and settings.indents:
181 self.document.reporter.severe("--indents may be given only with --gxl")
183 renderer = GraphRenderer.getRenderer(settings, self.document.reporter)
184 if not renderer:
185 self.document.reporter.severe("Internal error: No `GraphRenderer` found")
186 visitor = GraphTranslator(self.document, settings)
187 self.document.walkabout(visitor)
188 visitor.resolve()
189 self.output = renderer.render(visitor)
191 ##############################################################################
192 ##############################################################################
194 class GraphTranslator(nodes.GenericNodeVisitor):
196 """Name of the source file."""
197 sourceName = None
199 """The list of anchors found by traversing."""
200 anchors = [ ]
202 """The last anchor found."""
203 _lastAnchor = None
205 """The list of ``GEdge``\s found by traversing."""
206 references = [ ]
208 """Stack for being currently in a selected part."""
209 _inSelected = [ ]
211 """Counter for selecting a table by number."""
212 _tablesSeen = 0
214 """Selected tables."""
215 _selectedTables = None
217 """Use title path for labels."""
218 _titlePath = None
220 def __init__(self, document, settings):
221 nodes.GenericNodeVisitor.__init__(self, document)
222 self.sourceName = document[DuAttrSource]
223 self._selectedTables = settings.select_table
224 if self._selectedTables:
225 self._inSelected.append(False)
226 else:
227 self._inSelected.append(True)
228 self._titlePath = settings.title_path
230 def default_visit(self, node):
231 if self.isSelected(node, True):
232 self._inSelected.append(True)
233 elif self.unSelected(node):
234 self._inSelected.append(False)
235 if self._inSelected[-1]:
236 self.processNode(node)
238 def default_departure(self, node):
239 if self.isSelected(node, False) or self.unSelected(node):
240 self._inSelected.pop()
242 def isSelected(self, node, entering):
243 if (self._selectedTables
244 and isinstance(node, nodes.table)):
245 if entering:
246 self._tablesSeen += 1
247 visitor = FirstTitleGatherer(self.document)
248 node.walkabout(visitor)
249 title = visitor.text
250 for wantedTable in self._selectedTables:
251 try:
252 if int(wantedTable) == self._tablesSeen:
253 return True
254 except:
255 if wantedTable == title:
256 return True
257 return False
259 def unSelected(self, node):
260 # TOCs are never selected
261 return (isinstance(node, nodes.topic)
262 and DuAttrClassesValToc in node.get(DuAttrClasses, ( )))
264 def processNode(self, node):
265 if Anchor.isAnchor(node):
266 self._lastAnchor = anchor = Anchor(node, self.document,
267 self._titlePath)
268 self.anchors.append(anchor)
269 if Reference.isReference(node):
270 reference = Reference(node, self._lastAnchor)
271 self.references.append(reference)
273 def resolve(self):
274 """Resolve anything necessary after visiting the document."""
275 for reference in self.references:
276 reference.resolve(self.anchors)
278 ##############################################################################
280 class Anchor(object):
281 """An anchor in the source."""
283 """The source node."""
284 _node = None
286 """The document where the node is in."""
287 _document = None
289 """The name of the node."""
290 _name = None
292 """The id of the node."""
293 _id = None
295 """Use title path for the name."""
296 _titlePath = None
298 def __init__(self, node, document, titlePath):
299 self._node = node
300 self._document = document
301 self._titlePath = titlePath
303 def name(self):
304 """Determine and return the user readable name of the anchor."""
305 if self._name is None:
306 root = self._node
307 if isinstance(self._node, nodes.Structural):
308 if self._titlePath:
309 visitor = TitlePathGatherer(self._document, self._node)
310 root = self._document
311 else:
312 visitor = FirstTitleGatherer(self._document)
313 else:
314 visitor = TextGatherer(self._document)
315 root.walkabout(visitor)
316 self._name = visitor.text.replace("\n", "\\n")
317 return self._name
319 def id(self):
320 """Determine and return the canoncical id of the anchor."""
321 if self._id is None:
322 self._id = self._node[DuAttrIds][0]
323 return self._id
325 def ids(self):
326 """Return all ids of the anchor."""
328 return self._node[DuAttrIds]
330 @staticmethod
331 def isAnchor(node):
332 """``True`` if the node can be an ``Anchor``."""
333 # TODO What is considered an anchor needs to be subject to an option
334 return bool((isinstance(node, nodes.target)
335 or isinstance(node, nodes.Structural))
336 and node[DuAttrIds]
337 and not node.get(DuAttrRefuri, None))
339 ##############################################################################
341 class Reference(object):
342 """A reference to an anchor in the source."""
344 """The source node."""
345 _node = None
347 """The last anchor seen before this reference."""
348 fromAnchor = None
350 """The anchor this reference points to."""
351 toAnchor = None
353 def __init__(self, node, fromAnchor):
354 self._node = node
355 self.fromAnchor = fromAnchor
357 def resolve(self, anchors):
358 """Resolve this reference against the anchors given."""
359 for anchor in anchors:
360 if self._node[DuAttrRefid] in anchor.ids():
361 self.toAnchor = anchor
362 break
364 @staticmethod
365 def isReference(node):
366 """``True`` if the node can be a ``Reference``."""
367 return bool(isinstance(node, nodes.Referential)
368 and node.get(DuAttrRefid, None))
370 ##############################################################################
371 ##############################################################################
373 class TextGatherer(nodes.SparseNodeVisitor):
374 """A visitor gathering text. This should be subclassed for more specialized
375 gatherers."""
377 """Gathered text. May contain line feeds for several lines."""
378 text = ""
380 def visit_generated(self, node):
381 raise nodes.SkipNode()
383 def visit_Text(self, node):
384 self.text += node.astext()
386 ##############################################################################
388 class FirstTitleGatherer(TextGatherer):
389 """A visitor gathering text in first title."""
391 _gather = False
393 def visit_title(self, node):
394 self._gather = True
396 def depart_title(self, node):
397 raise nodes.StopTraversal()
399 def visit_Text(self, node):
400 if self._gather:
401 self.text += node.astext()
403 ##############################################################################
405 class TitlePathGatherer(TextGatherer):
406 """A visitor gathering title text for all sections leading up to a certain
407 one."""
409 """Current title path."""
410 _titles = None
412 """The structural node we are looking for."""
413 _structuralNode = None
415 def __init__(self, document, structuralNode):
416 nodes.SparseNodeVisitor.__init__(self, document)
417 self._titles = [ ]
418 if not isinstance(structuralNode, nodes.Structural):
419 raise TypeError("Node looked for in `TitlePathGatherer` must be a structural node")
420 self._structuralNode = structuralNode
422 def visit_section(self, node):
423 visitor = FirstTitleGatherer(self.document)
424 node.walkabout(visitor)
425 self._titles.append(visitor.text)
427 def depart_section(self, node):
428 if node == self._structuralNode:
429 self.text = "\n".join(self._titles)
430 raise nodes.StopTraversal()
431 else:
432 self._titles.pop()
434 ##############################################################################
435 ##############################################################################
437 class GraphRenderer(object):
438 """Abstract base class for graph renderers."""
440 """Command line settings."""
441 _settings = None
443 """Reverse the direction of the edges."""
444 _doReverse = None
446 """Unify multiple edges to a single one."""
447 _doUnify = None
449 """Render only connected nodes."""
450 _connectedOnly = None
452 """The GraphTranslator currently rendered."""
453 _visitor = None
455 def __init__(self, settings, reporter):
456 self._settings = settings
457 self.reporter = reporter
458 self._doReverse = self._settings.reverse
459 self._doUnify = not self._settings.multiedge
460 self._connectedOnly = self._settings.connected_only
462 @staticmethod
463 def getRenderer(settings, reporter):
464 """Factory method: Return the correct renderer according to the
465 settings."""
466 if settings.gxl:
467 return GxlRenderer(settings, reporter)
468 elif settings.dot:
469 return DotRenderer(settings, reporter)
470 return None
472 def render(self, visitor):
473 """Translate nodes and edges to a GXL DOM and return it as a XML
474 string."""
475 self._visitor = visitor
476 self.prepare()
478 references = self.validReferences(self._visitor.references)
479 for anchor in self.validAnchors(self._visitor.anchors, references):
480 self.renderAnchor(anchor)
481 for reference in references:
482 self.renderReference(reference)
484 r = self.finish()
485 self._visitor = None
486 return r
488 def prepare(self):
489 """Prepare the rendering."""
490 pass
492 def finish(self):
493 """Finish the rendering and return the resulting string."""
494 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `finish()`")
496 def renderAnchor(self, anchor):
497 """Render an anchor."""
498 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `renderAnchor()`")
500 def renderReference(self, reference):
501 """Render a reference."""
502 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `renderReference()`")
504 def validReferences(self, references):
505 """Checks references for valid ones and returns these."""
506 valids = [ ]
507 for reference in references:
508 add = reference.fromAnchor and reference.toAnchor
509 if add and self._doUnify:
510 for unique in valids:
511 if (reference.fromAnchor == unique.fromAnchor and
512 reference.toAnchor == unique.toAnchor):
513 add = False
514 break
515 if add:
516 valids.append(reference)
517 return valids
519 def validAnchors(self, anchors, references):
520 """Checks anchors for valid ones and returns these."""
521 if not self._connectedOnly:
522 return anchors
523 usedAnchors = ([ reference.fromAnchor
524 for reference in references ]
525 + [ reference.toAnchor
526 for reference in references ])
527 return [ anchor
528 for anchor in anchors
529 if anchor in usedAnchors ]
531 ##############################################################################
533 class GxlRenderer(GraphRenderer):
534 """Renderer fox GXL."""
536 """Indentation to use for output."""
537 _indent = ''
539 """Newline to use for output."""
540 _newline = ''
542 """The DOM document where the result is build."""
543 _doc = None
545 """The top node in the DOM."""
546 _graph = None
548 def __init__(self, settings, reporter):
549 GraphRenderer.__init__(self, settings, reporter)
550 if self._settings.indents:
551 self._indent = ' '
552 self._newline = '\n'
554 def prepare(self):
555 impl = minidom.getDOMImplementation()
556 doctype = impl.createDocumentType(GxlTagRoot, None, GxlNamespace)
557 self._doc = impl.createDocument(None, GxlTagRoot, doctype)
558 self._graph = self._doc.createElement(GxlTagGraph)
559 self._doc.documentElement.appendChild(self._graph)
560 self._graph.setAttribute(GxlAttrId, string2XMLName(self._visitor.sourceName))
561 self._graph.setAttribute(GxlAttrEdgemode, GxlValEdgemode)
563 def finish(self):
564 r = self._doc.toprettyxml(self._indent, self._newline)
565 self._graph = None
566 self._doc.unlink()
567 self._doc = None
568 return r
570 def renderAnchor(self, anchor):
571 eNode = self._doc.createElement(GxlTagNode)
572 self._graph.appendChild(eNode)
573 eNode.setAttribute(GxlAttrId, anchor.id())
575 eAttr = self._doc.createElement(GxlTagAttr)
576 eNode.appendChild(eAttr)
577 eAttr.setAttribute(GxlAttrName, GxlTagAttrTagName)
579 eContent = self._doc.createElement(GxlTagAttrTagNameTag)
580 eAttr.appendChild(eContent)
581 eContent.appendChild(self._doc.createTextNode(anchor.name()))
583 def renderReference(self, reference):
584 if reference.fromAnchor is None:
585 # No anchor to start edge from
586 # TODO Should result in a warning
587 return
589 eEdge = self._doc.createElement(GxlTagEdge)
590 self._graph.appendChild(eEdge)
591 fromAttr = GxlAttrFrom
592 toAttr = GxlAttrTo
593 if self._doReverse:
594 (fromAttr, toAttr) = (toAttr, fromAttr)
595 eEdge.setAttribute(toAttr, reference.toAnchor.id())
596 # TODO There should be several ways to identify the "from" node
597 eEdge.setAttribute(fromAttr, reference.fromAnchor.id())
599 ##############################################################################
601 class DotRenderer(GraphRenderer):
602 """Renderer fox dot."""
604 """The AGraph used for rendering."""
605 _graph = None
607 def __init__(self, settings, reporter):
608 GraphRenderer.__init__(self, settings, reporter)
610 if not pygraphvizAvail:
611 self.reporter.severe("Can not render dot format because module `pygraphviz` cannot be loaded")
613 def prepare(self):
614 self._graph = AGraph(strict=False, directed=True,
615 name=self._visitor.sourceName)
617 def finish(self):
618 # TODO The ordering of nodes seems to be rather random in the output;
619 # however, the node ordering is used for the layout of the graph
620 # at least by `dot` and by `neato`; there should be a way to
621 # determine the ordering
622 r = self._graph.string()
623 self._graph = None
624 return r
626 def renderAnchor(self, anchor):
627 self._graph.add_node(anchor.id(), label=anchor.name())
629 def renderReference(self, reference):
630 if reference.fromAnchor is None:
631 # No anchor to start edge from
632 # TODO Should result in a warning
633 return
635 fromId = reference.fromAnchor.id()
636 toId = reference.toAnchor.id()
637 if self._doReverse:
638 (fromId, toId) = (toId, fromId)
639 self._graph.add_edge(fromId, toId)
641 ##############################################################################
642 ##############################################################################
643 # Main
645 publish_cmdline(writer=Writer(), description=description)