3 # Copyright (C) 2010, 2013 Stefan Merten
5 # rst2graph.py is free software; you can redistribute it and/or modify
6 # it under the terms of the GNU General Public License as published
7 # by the Free Software Foundation; either version 2 of the License,
8 # or (at your option) any later version.
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty of
12 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 # General Public License for more details.
15 # You should have received a copy of the GNU General Public License
16 # along with this program; if not, write to the Free Software
17 # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
21 Translates the links in a reStructuredText document to a graph. The graph can
22 be output in various formats such as GXL_ or dot_. This graph can then be
23 converted to a graphical representation of the link structure of the document.
25 .. _GXL: http://www.gupro.de/GXL
26 .. _dot: http://www.graphviz.org/content/dot-language
29 __docformat__
= 'reStructuredText'
33 locale
.setlocale(locale
.LC_ALL
, '')
38 from docutils
import frontend
, writers
, nodes
, utils
39 from docutils
.core
import publish_cmdline
, default_description
41 from xml
.dom
import minidom
48 from pygraphviz
import AGraph
49 pygraphvizAvail
= True
51 pygraphvizAvail
= False
53 description
= ('Generates GXL from standalone reStructuredText sources. '
54 + default_description
)
56 # `gxl2gv` supports V1.0 only
57 GxlNamespace
= "http://www.gupro.de/GXL/gxl-1.0.dtd"
65 GxlTagAttrTagName
= "name"
66 GxlTagAttrTagNameTag
= "string"
69 GxlAttrEdgemode
= "edgemode"
70 GxlValEdgemode
= "directed"
72 DuAttrSource
= "source"
76 DuAttrRefuri
= "refuri"
77 DuAttrClasses
= "classes"
78 DuAttrClassesValToc
= "contents"
80 CmdRst2Gxl
= "rst2gxl"
81 CmdRst2Dot
= "rst2dot"
84 ##############################################################################
85 ##############################################################################
88 def string2XMLName(s
):
89 """Return an XML Name similar to the string given."""
90 s
= re
.sub(r
"(?u)[^-.:\w]", "_", s
)
91 return re
.sub("^(?u)[^:\w]", "_", s
)
93 ##############################################################################
94 ##############################################################################
96 class Writer(writers
.Writer
):
98 """Formats this writer supports."""
99 supported
= ('gxl', 'gv', 'dot')
102 'Graph Writer Format Options',
103 'The following options determine the output format produced.',
105 ('Produce output in the dot language suitable for Graphviz. '
106 'Default when called as rst2gv or rst2dot.',
108 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
109 ('Produce GXL output. '
110 'Default when called as rst2gxl.',
112 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
114 'Graph Writer Global Options',
115 'The following options affect the way the graph is produced.',
117 ('Put only nodes with connections to the graph. '
118 'Default is to put all nodes to the graph.',
119 ['--connected-only'],
120 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
121 ('Create multiple edges between same node if they exist in the '
122 'original document. Default is to unify all edges between two nodes.',
124 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
125 ('Create a reverse dependency graph. Default is a forward dependency '
128 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
129 ('Select a certain table and ignore the rest of the document. The '
130 'argument must be the name of the table as given in the document or '
131 'the number of the table counting from 1. '
132 'Default is to consider the whole document. May be given more than '
135 {'action': 'append'}),
136 ('Use the whole path of section titles up to a section as label for '
137 'a node. Default is to use only the section title.',
139 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
141 'Graph Writer GXL Options',
142 'The following options are valid only for GXL format output.',
144 ('Generate XML with indents and newlines.',
146 {'action': 'store_true', 'validator': frontend
.validate_boolean
}),
150 settings_defaults
= {'output_encoding_error_handler': 'xmlcharrefreplace',
157 'connected_only': False,
161 config_section
= 'graph writer'
162 config_section_dependencies
= ('writers',)
164 """Final translated form of `document`."""
168 settings
= self
.document
.settings
169 if settings
.gxl
and settings
.dot
:
170 self
.document
.reporter
.severe("Options --gxl and --dot/--gv are mutual exclusive")
171 elif not settings
.gxl
and not settings
.dot
:
173 cmd
= os
.path
.basename(sys
.argv
[0]).lower()
174 if cmd
.startswith(CmdRst2Gxl
):
176 elif cmd
.startswith(CmdRst2Dot
) or cmd
.startswith(CmdRst2Gv
):
178 if not settings
.gxl
and not settings
.dot
:
179 self
.document
.reporter
.severe("One of --gxl and --dot/--gv must be given or implied")
180 if not settings
.gxl
and settings
.indents
:
181 self
.document
.reporter
.severe("--indents may be given only with --gxl")
183 renderer
= GraphRenderer
.getRenderer(settings
, self
.document
.reporter
)
185 self
.document
.reporter
.severe("Internal error: No `GraphRenderer` found")
186 visitor
= GraphTranslator(self
.document
, settings
)
187 self
.document
.walkabout(visitor
)
189 self
.output
= renderer
.render(visitor
)
191 ##############################################################################
192 ##############################################################################
194 class GraphTranslator(nodes
.GenericNodeVisitor
):
196 """Name of the source file."""
199 """The list of anchors found by traversing."""
202 """The last anchor found."""
205 """The list of ``GEdge``\s found by traversing."""
208 """Stack for being currently in a selected part."""
211 """Counter for selecting a table by number."""
214 """Selected tables."""
215 _selectedTables
= None
217 """Use title path for labels."""
220 def __init__(self
, document
, settings
):
221 nodes
.GenericNodeVisitor
.__init
__(self
, document
)
222 self
.sourceName
= document
[DuAttrSource
]
223 self
._selectedTables
= settings
.select_table
224 if self
._selectedTables
:
225 self
._inSelected
.append(False)
227 self
._inSelected
.append(True)
228 self
._titlePath
= settings
.title_path
230 def default_visit(self
, node
):
231 if self
.isSelected(node
, True):
232 self
._inSelected
.append(True)
233 elif self
.unSelected(node
):
234 self
._inSelected
.append(False)
235 if self
._inSelected
[-1]:
236 self
.processNode(node
)
238 def default_departure(self
, node
):
239 if self
.isSelected(node
, False) or self
.unSelected(node
):
240 self
._inSelected
.pop()
242 def isSelected(self
, node
, entering
):
243 if (self
._selectedTables
244 and isinstance(node
, nodes
.table
)):
246 self
._tablesSeen
+= 1
247 visitor
= FirstTitleGatherer(self
.document
)
248 node
.walkabout(visitor
)
250 for wantedTable
in self
._selectedTables
:
252 if int(wantedTable
) == self
._tablesSeen
:
255 if wantedTable
== title
:
259 def unSelected(self
, node
):
260 # TOCs are never selected
261 return (isinstance(node
, nodes
.topic
)
262 and DuAttrClassesValToc
in node
.get(DuAttrClasses
, ( )))
264 def processNode(self
, node
):
265 if Anchor
.isAnchor(node
):
266 self
._lastAnchor
= anchor
= Anchor(node
, self
.document
,
268 self
.anchors
.append(anchor
)
269 if Reference
.isReference(node
):
270 reference
= Reference(node
, self
._lastAnchor
)
271 self
.references
.append(reference
)
274 """Resolve anything necessary after visiting the document."""
275 for reference
in self
.references
:
276 reference
.resolve(self
.anchors
)
278 ##############################################################################
280 class Anchor(object):
281 """An anchor in the source."""
283 """The source node."""
286 """The document where the node is in."""
289 """The name of the node."""
292 """The id of the node."""
295 """Use title path for the name."""
298 def __init__(self
, node
, document
, titlePath
):
300 self
._document
= document
301 self
._titlePath
= titlePath
304 """Determine and return the user readable name of the anchor."""
305 if self
._name
is None:
307 if isinstance(self
._node
, nodes
.Structural
):
309 visitor
= TitlePathGatherer(self
._document
, self
._node
)
310 root
= self
._document
312 visitor
= FirstTitleGatherer(self
._document
)
314 visitor
= TextGatherer(self
._document
)
315 root
.walkabout(visitor
)
316 self
._name
= visitor
.text
.replace("\n", "\\n")
320 """Determine and return the canoncical id of the anchor."""
322 self
._id
= self
._node
[DuAttrIds
][0]
326 """Return all ids of the anchor."""
328 return self
._node
[DuAttrIds
]
332 """``True`` if the node can be an ``Anchor``."""
333 # TODO What is considered an anchor needs to be subject to an option
334 return bool((isinstance(node
, nodes
.target
)
335 or isinstance(node
, nodes
.Structural
))
337 and not node
.get(DuAttrRefuri
, None))
339 ##############################################################################
341 class Reference(object):
342 """A reference to an anchor in the source."""
344 """The source node."""
347 """The last anchor seen before this reference."""
350 """The anchor this reference points to."""
353 def __init__(self
, node
, fromAnchor
):
355 self
.fromAnchor
= fromAnchor
357 def resolve(self
, anchors
):
358 """Resolve this reference against the anchors given."""
359 for anchor
in anchors
:
360 if self
._node
[DuAttrRefid
] in anchor
.ids():
361 self
.toAnchor
= anchor
365 def isReference(node
):
366 """``True`` if the node can be a ``Reference``."""
367 return bool(isinstance(node
, nodes
.Referential
)
368 and node
.get(DuAttrRefid
, None))
370 ##############################################################################
371 ##############################################################################
373 class TextGatherer(nodes
.SparseNodeVisitor
):
374 """A visitor gathering text. This should be subclassed for more specialized
377 """Gathered text. May contain line feeds for several lines."""
380 def visit_generated(self
, node
):
381 raise nodes
.SkipNode()
383 def visit_Text(self
, node
):
384 self
.text
+= node
.astext()
386 ##############################################################################
388 class FirstTitleGatherer(TextGatherer
):
389 """A visitor gathering text in first title."""
393 def visit_title(self
, node
):
396 def depart_title(self
, node
):
397 raise nodes
.StopTraversal()
399 def visit_Text(self
, node
):
401 self
.text
+= node
.astext()
403 ##############################################################################
405 class TitlePathGatherer(TextGatherer
):
406 """A visitor gathering title text for all sections leading up to a certain
409 """Current title path."""
412 """The structural node we are looking for."""
413 _structuralNode
= None
415 def __init__(self
, document
, structuralNode
):
416 nodes
.SparseNodeVisitor
.__init
__(self
, document
)
418 if not isinstance(structuralNode
, nodes
.Structural
):
419 raise TypeError("Node looked for in `TitlePathGatherer` must be a structural node")
420 self
._structuralNode
= structuralNode
422 def visit_section(self
, node
):
423 visitor
= FirstTitleGatherer(self
.document
)
424 node
.walkabout(visitor
)
425 self
._titles
.append(visitor
.text
)
427 def depart_section(self
, node
):
428 if node
== self
._structuralNode
:
429 self
.text
= "\n".join(self
._titles
)
430 raise nodes
.StopTraversal()
434 ##############################################################################
435 ##############################################################################
437 class GraphRenderer(object):
438 """Abstract base class for graph renderers."""
440 """Command line settings."""
443 """Reverse the direction of the edges."""
446 """Unify multiple edges to a single one."""
449 """Render only connected nodes."""
450 _connectedOnly
= None
452 """The GraphTranslator currently rendered."""
455 def __init__(self
, settings
, reporter
):
456 self
._settings
= settings
457 self
.reporter
= reporter
458 self
._doReverse
= self
._settings
.reverse
459 self
._doUnify
= not self
._settings
.multiedge
460 self
._connectedOnly
= self
._settings
.connected_only
463 def getRenderer(settings
, reporter
):
464 """Factory method: Return the correct renderer according to the
467 return GxlRenderer(settings
, reporter
)
469 return DotRenderer(settings
, reporter
)
472 def render(self
, visitor
):
473 """Translate nodes and edges to a GXL DOM and return it as a XML
475 self
._visitor
= visitor
478 references
= self
.validReferences(self
._visitor
.references
)
479 for anchor
in self
.validAnchors(self
._visitor
.anchors
, references
):
480 self
.renderAnchor(anchor
)
481 for reference
in references
:
482 self
.renderReference(reference
)
489 """Prepare the rendering."""
493 """Finish the rendering and return the resulting string."""
494 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `finish()`")
496 def renderAnchor(self
, anchor
):
497 """Render an anchor."""
498 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `renderAnchor()`")
500 def renderReference(self
, reference
):
501 """Render a reference."""
502 raise NotImplementedError("Each subclass of `GraphRenderer` must implement `renderReference()`")
504 def validReferences(self
, references
):
505 """Checks references for valid ones and returns these."""
507 for reference
in references
:
508 add
= reference
.fromAnchor
and reference
.toAnchor
509 if add
and self
._doUnify
:
510 for unique
in valids
:
511 if (reference
.fromAnchor
== unique
.fromAnchor
and
512 reference
.toAnchor
== unique
.toAnchor
):
516 valids
.append(reference
)
519 def validAnchors(self
, anchors
, references
):
520 """Checks anchors for valid ones and returns these."""
521 if not self
._connectedOnly
:
523 usedAnchors
= ([ reference
.fromAnchor
524 for reference
in references
]
525 + [ reference
.toAnchor
526 for reference
in references
])
528 for anchor
in anchors
529 if anchor
in usedAnchors
]
531 ##############################################################################
533 class GxlRenderer(GraphRenderer
):
534 """Renderer fox GXL."""
536 """Indentation to use for output."""
539 """Newline to use for output."""
542 """The DOM document where the result is build."""
545 """The top node in the DOM."""
548 def __init__(self
, settings
, reporter
):
549 GraphRenderer
.__init
__(self
, settings
, reporter
)
550 if self
._settings
.indents
:
555 impl
= minidom
.getDOMImplementation()
556 doctype
= impl
.createDocumentType(GxlTagRoot
, None, GxlNamespace
)
557 self
._doc
= impl
.createDocument(None, GxlTagRoot
, doctype
)
558 self
._graph
= self
._doc
.createElement(GxlTagGraph
)
559 self
._doc
.documentElement
.appendChild(self
._graph
)
560 self
._graph
.setAttribute(GxlAttrId
, string2XMLName(self
._visitor
.sourceName
))
561 self
._graph
.setAttribute(GxlAttrEdgemode
, GxlValEdgemode
)
564 r
= self
._doc
.toprettyxml(self
._indent
, self
._newline
)
570 def renderAnchor(self
, anchor
):
571 eNode
= self
._doc
.createElement(GxlTagNode
)
572 self
._graph
.appendChild(eNode
)
573 eNode
.setAttribute(GxlAttrId
, anchor
.id())
575 eAttr
= self
._doc
.createElement(GxlTagAttr
)
576 eNode
.appendChild(eAttr
)
577 eAttr
.setAttribute(GxlAttrName
, GxlTagAttrTagName
)
579 eContent
= self
._doc
.createElement(GxlTagAttrTagNameTag
)
580 eAttr
.appendChild(eContent
)
581 eContent
.appendChild(self
._doc
.createTextNode(anchor
.name()))
583 def renderReference(self
, reference
):
584 if reference
.fromAnchor
is None:
585 # No anchor to start edge from
586 # TODO Should result in a warning
589 eEdge
= self
._doc
.createElement(GxlTagEdge
)
590 self
._graph
.appendChild(eEdge
)
591 fromAttr
= GxlAttrFrom
594 (fromAttr
, toAttr
) = (toAttr
, fromAttr
)
595 eEdge
.setAttribute(toAttr
, reference
.toAnchor
.id())
596 # TODO There should be several ways to identify the "from" node
597 eEdge
.setAttribute(fromAttr
, reference
.fromAnchor
.id())
599 ##############################################################################
601 class DotRenderer(GraphRenderer
):
602 """Renderer fox dot."""
604 """The AGraph used for rendering."""
607 def __init__(self
, settings
, reporter
):
608 GraphRenderer
.__init
__(self
, settings
, reporter
)
610 if not pygraphvizAvail
:
611 self
.reporter
.severe("Can not render dot format because module `pygraphviz` cannot be loaded")
614 self
._graph
= AGraph(strict
=False, directed
=True,
615 name
=self
._visitor
.sourceName
)
618 # TODO The ordering of nodes seems to be rather random in the output;
619 # however, the node ordering is used for the layout of the graph
620 # at least by `dot` and by `neato`; there should be a way to
621 # determine the ordering
622 r
= self
._graph
.string()
626 def renderAnchor(self
, anchor
):
627 self
._graph
.add_node(anchor
.id(), label
=anchor
.name())
629 def renderReference(self
, reference
):
630 if reference
.fromAnchor
is None:
631 # No anchor to start edge from
632 # TODO Should result in a warning
635 fromId
= reference
.fromAnchor
.id()
636 toId
= reference
.toAnchor
.id()
638 (fromId
, toId
) = (toId
, fromId
)
639 self
._graph
.add_edge(fromId
, toId
)
641 ##############################################################################
642 ##############################################################################
645 publish_cmdline(writer
=Writer(), description
=description
)