Fix [ 3601607 ] node.__repr__() must return `str` instance.
[docutils.git] / docutils / nodes.py
blob86d11362464b49c7aeb8dace94346bb7d069ba9a
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Maintainer: docutils-develop@lists.sourceforge.net
4 # Copyright: This module has been placed in the public domain.
6 """
7 Docutils document tree element class library.
9 Classes in CamelCase are abstract base classes or auxiliary classes. The one
10 exception is `Text`, for a text (PCDATA) node; uppercase is used to
11 differentiate from element classes. Classes in lower_case_with_underscores
12 are element classes, matching the XML element generic identifiers in the DTD_.
14 The position of each node (the level at which it can occur) is significant and
15 is represented by abstract base classes (`Root`, `Structural`, `Body`,
16 `Inline`, etc.). Certain transformations will be easier because we can use
17 ``isinstance(node, base_class)`` to determine the position of the node in the
18 hierarchy.
20 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
21 """
23 __docformat__ = 'reStructuredText'
25 import sys
26 import os
27 import re
28 import warnings
29 import types
30 import unicodedata
32 # ==============================
33 # Functional Node Base Classes
34 # ==============================
36 class Node(object):
38 """Abstract base class of nodes in a document tree."""
40 parent = None
41 """Back-reference to the Node immediately containing this Node."""
43 document = None
44 """The `document` node at the root of the tree containing this Node."""
46 source = None
47 """Path or description of the input source which generated this Node."""
49 line = None
50 """The line number (1-based) of the beginning of this Node in `source`."""
52 def __nonzero__(self):
53 """
54 Node instances are always true, even if they're empty. A node is more
55 than a simple container. Its boolean "truth" does not depend on
56 having one or more subnodes in the doctree.
58 Use `len()` to check node length. Use `None` to represent a boolean
59 false value.
60 """
61 return True
63 if sys.version_info < (3,):
64 # on 2.x, str(node) will be a byte string with Unicode
65 # characters > 255 escaped; on 3.x this is no longer necessary
66 def __str__(self):
67 return unicode(self).encode('raw_unicode_escape')
69 def asdom(self, dom=None):
70 """Return a DOM **fragment** representation of this Node."""
71 if dom is None:
72 import xml.dom.minidom as dom
73 domroot = dom.Document()
74 return self._dom_node(domroot)
76 def pformat(self, indent=' ', level=0):
77 """
78 Return an indented pseudo-XML representation, for test purposes.
80 Override in subclasses.
81 """
82 raise NotImplementedError
84 def copy(self):
85 """Return a copy of self."""
86 raise NotImplementedError
88 def deepcopy(self):
89 """Return a deep copy of self (also copying children)."""
90 raise NotImplementedError
92 def setup_child(self, child):
93 child.parent = self
94 if self.document:
95 child.document = self.document
96 if child.source is None:
97 child.source = self.document.current_source
98 if child.line is None:
99 child.line = self.document.current_line
101 def walk(self, visitor):
103 Traverse a tree of `Node` objects, calling the
104 `dispatch_visit()` method of `visitor` when entering each
105 node. (The `walkabout()` method is similar, except it also
106 calls the `dispatch_departure()` method before exiting each
107 node.)
109 This tree traversal supports limited in-place tree
110 modifications. Replacing one node with one or more nodes is
111 OK, as is removing an element. However, if the node removed
112 or replaced occurs after the current node, the old node will
113 still be traversed, and any new nodes will not.
115 Within ``visit`` methods (and ``depart`` methods for
116 `walkabout()`), `TreePruningException` subclasses may be raised
117 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
119 Parameter `visitor`: A `NodeVisitor` object, containing a
120 ``visit`` implementation for each `Node` subclass encountered.
122 Return true if we should stop the traversal.
124 stop = False
125 visitor.document.reporter.debug(
126 'docutils.nodes.Node.walk calling dispatch_visit for %s'
127 % self.__class__.__name__)
128 try:
129 try:
130 visitor.dispatch_visit(self)
131 except (SkipChildren, SkipNode):
132 return stop
133 except SkipDeparture: # not applicable; ignore
134 pass
135 children = self.children
136 try:
137 for child in children[:]:
138 if child.walk(visitor):
139 stop = True
140 break
141 except SkipSiblings:
142 pass
143 except StopTraversal:
144 stop = True
145 return stop
147 def walkabout(self, visitor):
149 Perform a tree traversal similarly to `Node.walk()` (which
150 see), except also call the `dispatch_departure()` method
151 before exiting each node.
153 Parameter `visitor`: A `NodeVisitor` object, containing a
154 ``visit`` and ``depart`` implementation for each `Node`
155 subclass encountered.
157 Return true if we should stop the traversal.
159 call_depart = True
160 stop = False
161 visitor.document.reporter.debug(
162 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
163 % self.__class__.__name__)
164 try:
165 try:
166 visitor.dispatch_visit(self)
167 except SkipNode:
168 return stop
169 except SkipDeparture:
170 call_depart = False
171 children = self.children
172 try:
173 for child in children[:]:
174 if child.walkabout(visitor):
175 stop = True
176 break
177 except SkipSiblings:
178 pass
179 except SkipChildren:
180 pass
181 except StopTraversal:
182 stop = True
183 if call_depart:
184 visitor.document.reporter.debug(
185 'docutils.nodes.Node.walkabout calling dispatch_departure '
186 'for %s' % self.__class__.__name__)
187 visitor.dispatch_departure(self)
188 return stop
190 def _fast_traverse(self, cls):
191 """Specialized traverse() that only supports instance checks."""
192 result = []
193 if isinstance(self, cls):
194 result.append(self)
195 for child in self.children:
196 result.extend(child._fast_traverse(cls))
197 return result
199 def _all_traverse(self):
200 """Specialized traverse() that doesn't check for a condition."""
201 result = []
202 result.append(self)
203 for child in self.children:
204 result.extend(child._all_traverse())
205 return result
207 def traverse(self, condition=None, include_self=True, descend=True,
208 siblings=False, ascend=False):
210 Return an iterable containing
212 * self (if include_self is true)
213 * all descendants in tree traversal order (if descend is true)
214 * all siblings (if siblings is true) and their descendants (if
215 also descend is true)
216 * the siblings of the parent (if ascend is true) and their
217 descendants (if also descend is true), and so on
219 If `condition` is not None, the iterable contains only nodes
220 for which ``condition(node)`` is true. If `condition` is a
221 node class ``cls``, it is equivalent to a function consisting
222 of ``return isinstance(node, cls)``.
224 If ascend is true, assume siblings to be true as well.
226 For example, given the following tree::
228 <paragraph>
229 <emphasis> <--- emphasis.traverse() and
230 <strong> <--- strong.traverse() are called.
233 <reference name="Baz" refid="baz">
236 Then list(emphasis.traverse()) equals ::
238 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
240 and list(strong.traverse(ascend=True)) equals ::
242 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
244 if ascend:
245 siblings=True
246 # Check for special argument combinations that allow using an
247 # optimized version of traverse()
248 if include_self and descend and not siblings:
249 if condition is None:
250 return self._all_traverse()
251 elif isinstance(condition, (types.ClassType, type)):
252 return self._fast_traverse(condition)
253 # Check if `condition` is a class (check for TypeType for Python
254 # implementations that use only new-style classes, like PyPy).
255 if isinstance(condition, (types.ClassType, type)):
256 node_class = condition
257 def condition(node, node_class=node_class):
258 return isinstance(node, node_class)
259 r = []
260 if include_self and (condition is None or condition(self)):
261 r.append(self)
262 if descend and len(self.children):
263 for child in self:
264 r.extend(child.traverse(include_self=True, descend=True,
265 siblings=False, ascend=False,
266 condition=condition))
267 if siblings or ascend:
268 node = self
269 while node.parent:
270 index = node.parent.index(node)
271 for sibling in node.parent[index+1:]:
272 r.extend(sibling.traverse(include_self=True,
273 descend=descend,
274 siblings=False, ascend=False,
275 condition=condition))
276 if not ascend:
277 break
278 else:
279 node = node.parent
280 return r
282 def next_node(self, condition=None, include_self=False, descend=True,
283 siblings=False, ascend=False):
285 Return the first node in the iterable returned by traverse(),
286 or None if the iterable is empty.
288 Parameter list is the same as of traverse. Note that
289 include_self defaults to 0, though.
291 iterable = self.traverse(condition=condition,
292 include_self=include_self, descend=descend,
293 siblings=siblings, ascend=ascend)
294 try:
295 return iterable[0]
296 except IndexError:
297 return None
299 if sys.version_info < (3,):
300 class reprunicode(unicode):
302 A unicode sub-class that removes the initial u from unicode's repr.
305 def __repr__(self):
306 return unicode.__repr__(self)[1:]
309 else:
310 reprunicode = unicode
313 def ensure_str(s):
315 Failsave conversion of `unicode` to `str`.
317 if sys.version_info < (3,) and isinstance(s, unicode):
318 return s.encode('ascii', 'backslashreplace')
319 return s
322 class Text(Node, reprunicode):
325 Instances are terminal nodes (leaves) containing text only; no child
326 nodes or attributes. Initialize by passing a string to the constructor.
327 Access the text itself with the `astext` method.
330 tagname = '#text'
332 children = ()
333 """Text nodes have no children, and cannot have children."""
335 if sys.version_info > (3,):
336 def __new__(cls, data, rawsource=None):
337 """Prevent the rawsource argument from propagating to str."""
338 if isinstance(data, bytes):
339 raise TypeError('expecting str data, not bytes')
340 return reprunicode.__new__(cls, data)
341 else:
342 def __new__(cls, data, rawsource=None):
343 """Prevent the rawsource argument from propagating to str."""
344 return reprunicode.__new__(cls, data)
346 def __init__(self, data, rawsource=''):
348 self.rawsource = rawsource
349 """The raw text from which this element was constructed."""
351 def shortrepr(self, maxlen=18):
352 data = self
353 if len(data) > maxlen:
354 data = data[:maxlen-4] + ' ...'
355 return '<%s: %r>' % (self.tagname, reprunicode(data))
357 def __repr__(self):
358 return self.shortrepr(maxlen=68)
360 def _dom_node(self, domroot):
361 return domroot.createTextNode(unicode(self))
363 def astext(self):
364 return reprunicode(self)
366 # Note about __unicode__: The implementation of __unicode__ here,
367 # and the one raising NotImplemented in the superclass Node had
368 # to be removed when changing Text to a subclass of unicode instead
369 # of UserString, since there is no way to delegate the __unicode__
370 # call to the superclass unicode:
371 # unicode itself does not have __unicode__ method to delegate to
372 # and calling unicode(self) or unicode.__new__ directly creates
373 # an infinite loop
375 def copy(self):
376 return self.__class__(reprunicode(self), rawsource=self.rawsource)
378 def deepcopy(self):
379 return self.copy()
381 def pformat(self, indent=' ', level=0):
382 result = []
383 indent = indent * level
384 for line in self.splitlines():
385 result.append(indent + line + '\n')
386 return ''.join(result)
388 # rstrip and lstrip are used by substitution definitions where
389 # they are expected to return a Text instance, this was formerly
390 # taken care of by UserString. Note that then and now the
391 # rawsource member is lost.
393 def rstrip(self, chars=None):
394 return self.__class__(reprunicode.rstrip(self, chars))
395 def lstrip(self, chars=None):
396 return self.__class__(reprunicode.lstrip(self, chars))
398 class Element(Node):
401 `Element` is the superclass to all specific elements.
403 Elements contain attributes and child nodes. Elements emulate
404 dictionaries for attributes, indexing by attribute name (a string). To
405 set the attribute 'att' to 'value', do::
407 element['att'] = 'value'
409 There are two special attributes: 'ids' and 'names'. Both are
410 lists of unique identifiers, and names serve as human interfaces
411 to IDs. Names are case- and whitespace-normalized (see the
412 fully_normalize_name() function), and IDs conform to the regular
413 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
415 Elements also emulate lists for child nodes (element nodes and/or text
416 nodes), indexing by integer. To get the first child node, use::
418 element[0]
420 Elements may be constructed using the ``+=`` operator. To add one new
421 child node to element, do::
423 element += node
425 This is equivalent to ``element.append(node)``.
427 To add a list of multiple child nodes at once, use the same ``+=``
428 operator::
430 element += [node1, node2]
432 This is equivalent to ``element.extend([node1, node2])``.
435 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
436 """List attributes, automatically initialized to empty lists for
437 all nodes."""
439 tagname = None
440 """The element generic identifier. If None, it is set as an instance
441 attribute to the name of the class."""
443 child_text_separator = '\n\n'
444 """Separator for child nodes, used by `astext()` method."""
446 def __init__(self, rawsource='', *children, **attributes):
447 self.rawsource = rawsource
448 """The raw text from which this element was constructed."""
450 self.children = []
451 """List of child nodes (elements and/or `Text`)."""
453 self.extend(children) # maintain parent info
455 self.attributes = {}
456 """Dictionary of attribute {name: value}."""
458 # Initialize list attributes.
459 for att in self.list_attributes:
460 self.attributes[att] = []
462 for att, value in attributes.items():
463 att = att.lower()
464 if att in self.list_attributes:
465 # mutable list; make a copy for this node
466 self.attributes[att] = value[:]
467 else:
468 self.attributes[att] = value
470 if self.tagname is None:
471 self.tagname = self.__class__.__name__
473 def _dom_node(self, domroot):
474 element = domroot.createElement(self.tagname)
475 for attribute, value in self.attlist():
476 if isinstance(value, list):
477 value = ' '.join([serial_escape('%s' % (v,)) for v in value])
478 element.setAttribute(attribute, '%s' % value)
479 for child in self.children:
480 element.appendChild(child._dom_node(domroot))
481 return element
483 def __repr__(self):
484 data = ''
485 for c in self.children:
486 data += c.shortrepr()
487 if len(data) > 60:
488 data = data[:56] + ' ...'
489 break
490 if self['names']:
491 return '<%s "%s": %s>' % (self.__class__.__name__,
492 '; '.join([ensure_str(n) for n in self['names']]), data)
493 else:
494 return '<%s: %s>' % (self.__class__.__name__, data)
496 def shortrepr(self):
497 if self['names']:
498 return '<%s "%s"...>' % (self.__class__.__name__,
499 '; '.join([ensure_str(n) for n in self['names']]))
500 else:
501 return '<%s...>' % self.tagname
503 def __unicode__(self):
504 if self.children:
505 return u'%s%s%s' % (self.starttag(),
506 ''.join([unicode(c) for c in self.children]),
507 self.endtag())
508 else:
509 return self.emptytag()
511 if sys.version_info > (3,):
512 # 2to3 doesn't convert __unicode__ to __str__
513 __str__ = __unicode__
515 def starttag(self, quoteattr=None):
516 # the optional arg is used by the docutils_xml writer
517 if quoteattr is None:
518 quoteattr = pseudo_quoteattr
519 parts = [self.tagname]
520 for name, value in self.attlist():
521 if value is None: # boolean attribute
522 parts.append(name)
523 continue
524 if isinstance(value, list):
525 values = [serial_escape('%s' % (v,)) for v in value]
526 value = ' '.join(values)
527 else:
528 value = unicode(value)
529 value = quoteattr(value)
530 parts.append(u'%s=%s' % (name, value))
531 return u'<%s>' % u' '.join(parts)
533 def endtag(self):
534 return '</%s>' % self.tagname
536 def emptytag(self):
537 return u'<%s/>' % u' '.join([self.tagname] +
538 ['%s="%s"' % (n, v)
539 for n, v in self.attlist()])
541 def __len__(self):
542 return len(self.children)
544 def __contains__(self, key):
545 # support both membership test for children and attributes
546 # (has_key is translated to "in" by 2to3)
547 if isinstance(key, basestring):
548 return key in self.attributes
549 return key in self.children
551 def __getitem__(self, key):
552 if isinstance(key, basestring):
553 return self.attributes[key]
554 elif isinstance(key, int):
555 return self.children[key]
556 elif isinstance(key, types.SliceType):
557 assert key.step in (None, 1), 'cannot handle slice with stride'
558 return self.children[key.start:key.stop]
559 else:
560 raise TypeError, ('element index must be an integer, a slice, or '
561 'an attribute name string')
563 def __setitem__(self, key, item):
564 if isinstance(key, basestring):
565 self.attributes[str(key)] = item
566 elif isinstance(key, int):
567 self.setup_child(item)
568 self.children[key] = item
569 elif isinstance(key, types.SliceType):
570 assert key.step in (None, 1), 'cannot handle slice with stride'
571 for node in item:
572 self.setup_child(node)
573 self.children[key.start:key.stop] = item
574 else:
575 raise TypeError, ('element index must be an integer, a slice, or '
576 'an attribute name string')
578 def __delitem__(self, key):
579 if isinstance(key, basestring):
580 del self.attributes[key]
581 elif isinstance(key, int):
582 del self.children[key]
583 elif isinstance(key, types.SliceType):
584 assert key.step in (None, 1), 'cannot handle slice with stride'
585 del self.children[key.start:key.stop]
586 else:
587 raise TypeError, ('element index must be an integer, a simple '
588 'slice, or an attribute name string')
590 def __add__(self, other):
591 return self.children + other
593 def __radd__(self, other):
594 return other + self.children
596 def __iadd__(self, other):
597 """Append a node or a list of nodes to `self.children`."""
598 if isinstance(other, Node):
599 self.append(other)
600 elif other is not None:
601 self.extend(other)
602 return self
604 def astext(self):
605 return self.child_text_separator.join(
606 [child.astext() for child in self.children])
608 def non_default_attributes(self):
609 atts = {}
610 for key, value in self.attributes.items():
611 if self.is_not_default(key):
612 atts[key] = value
613 return atts
615 def attlist(self):
616 attlist = self.non_default_attributes().items()
617 attlist.sort()
618 return attlist
620 def get(self, key, failobj=None):
621 return self.attributes.get(key, failobj)
623 def hasattr(self, attr):
624 return attr in self.attributes
626 def delattr(self, attr):
627 if attr in self.attributes:
628 del self.attributes[attr]
630 def setdefault(self, key, failobj=None):
631 return self.attributes.setdefault(key, failobj)
633 has_key = hasattr
635 # support operator ``in``
636 __contains__ = hasattr
638 def get_language_code(self, fallback=''):
639 """Return node's language tag.
641 Look iteratively in self and parents for a class argument
642 starting with ``language-`` and return the remainder of it
643 (which should be a `BCP49` language tag) or the `fallback`.
645 for cls in self.get('classes', []):
646 if cls.startswith('language-'):
647 return cls[9:]
648 try:
649 return self.parent.get_language(fallback)
650 except AttributeError:
651 return fallback
653 def append(self, item):
654 self.setup_child(item)
655 self.children.append(item)
657 def extend(self, item):
658 for node in item:
659 self.append(node)
661 def insert(self, index, item):
662 if isinstance(item, Node):
663 self.setup_child(item)
664 self.children.insert(index, item)
665 elif item is not None:
666 self[index:index] = item
668 def pop(self, i=-1):
669 return self.children.pop(i)
671 def remove(self, item):
672 self.children.remove(item)
674 def index(self, item):
675 return self.children.index(item)
677 def is_not_default(self, key):
678 if self[key] == [] and key in self.list_attributes:
679 return 0
680 else:
681 return 1
683 def update_basic_atts(self, dict):
685 Update basic attributes ('ids', 'names', 'classes',
686 'dupnames', but not 'source') from node or dictionary `dict`.
688 if isinstance(dict, Node):
689 dict = dict.attributes
690 for att in ('ids', 'classes', 'names', 'dupnames'):
691 for value in dict.get(att, []):
692 if not value in self[att]:
693 self[att].append(value)
695 def clear(self):
696 self.children = []
698 def replace(self, old, new):
699 """Replace one child `Node` with another child or children."""
700 index = self.index(old)
701 if isinstance(new, Node):
702 self.setup_child(new)
703 self[index] = new
704 elif new is not None:
705 self[index:index+1] = new
707 def replace_self(self, new):
709 Replace `self` node with `new`, where `new` is a node or a
710 list of nodes.
712 update = new
713 if not isinstance(new, Node):
714 # `new` is a list; update first child.
715 try:
716 update = new[0]
717 except IndexError:
718 update = None
719 if isinstance(update, Element):
720 update.update_basic_atts(self)
721 else:
722 # `update` is a Text node or `new` is an empty list.
723 # Assert that we aren't losing any attributes.
724 for att in ('ids', 'names', 'classes', 'dupnames'):
725 assert not self[att], \
726 'Losing "%s" attribute: %s' % (att, self[att])
727 self.parent.replace(self, new)
729 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
731 Return the index of the first child whose class exactly matches.
733 Parameters:
735 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
736 classes. If a tuple, any of the classes may match.
737 - `start`: Initial index to check.
738 - `end`: Initial index to *not* check.
740 if not isinstance(childclass, tuple):
741 childclass = (childclass,)
742 for index in range(start, min(len(self), end)):
743 for c in childclass:
744 if isinstance(self[index], c):
745 return index
746 return None
748 def first_child_not_matching_class(self, childclass, start=0,
749 end=sys.maxint):
751 Return the index of the first child whose class does *not* match.
753 Parameters:
755 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
756 classes. If a tuple, none of the classes may match.
757 - `start`: Initial index to check.
758 - `end`: Initial index to *not* check.
760 if not isinstance(childclass, tuple):
761 childclass = (childclass,)
762 for index in range(start, min(len(self), end)):
763 for c in childclass:
764 if isinstance(self.children[index], c):
765 break
766 else:
767 return index
768 return None
770 def pformat(self, indent=' ', level=0):
771 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
772 [child.pformat(indent, level+1)
773 for child in self.children])
775 def copy(self):
776 return self.__class__(rawsource=self.rawsource, **self.attributes)
778 def deepcopy(self):
779 copy = self.copy()
780 copy.extend([child.deepcopy() for child in self.children])
781 return copy
783 def set_class(self, name):
784 """Add a new class to the "classes" attribute."""
785 warnings.warn('docutils.nodes.Element.set_class deprecated; '
786 "append to Element['classes'] list attribute directly",
787 DeprecationWarning, stacklevel=2)
788 assert ' ' not in name
789 self['classes'].append(name.lower())
791 def note_referenced_by(self, name=None, id=None):
792 """Note that this Element has been referenced by its name
793 `name` or id `id`."""
794 self.referenced = 1
795 # Element.expect_referenced_by_* dictionaries map names or ids
796 # to nodes whose ``referenced`` attribute is set to true as
797 # soon as this node is referenced by the given name or id.
798 # Needed for target propagation.
799 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
800 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
801 if by_name:
802 assert name is not None
803 by_name.referenced = 1
804 if by_id:
805 assert id is not None
806 by_id.referenced = 1
809 class TextElement(Element):
812 An element which directly contains text.
814 Its children are all `Text` or `Inline` subclass nodes. You can
815 check whether an element's context is inline simply by checking whether
816 its immediate parent is a `TextElement` instance (including subclasses).
817 This is handy for nodes like `image` that can appear both inline and as
818 standalone body elements.
820 If passing children to `__init__()`, make sure to set `text` to
821 ``''`` or some other suitable value.
824 child_text_separator = ''
825 """Separator for child nodes, used by `astext()` method."""
827 def __init__(self, rawsource='', text='', *children, **attributes):
828 if text != '':
829 textnode = Text(text)
830 Element.__init__(self, rawsource, textnode, *children,
831 **attributes)
832 else:
833 Element.__init__(self, rawsource, *children, **attributes)
836 class FixedTextElement(TextElement):
838 """An element which directly contains preformatted text."""
840 def __init__(self, rawsource='', text='', *children, **attributes):
841 TextElement.__init__(self, rawsource, text, *children, **attributes)
842 self.attributes['xml:space'] = 'preserve'
845 # ========
846 # Mixins
847 # ========
849 class Resolvable:
851 resolved = 0
854 class BackLinkable:
856 def add_backref(self, refid):
857 self['backrefs'].append(refid)
860 # ====================
861 # Element Categories
862 # ====================
864 class Root: pass
866 class Titular: pass
868 class PreBibliographic:
869 """Category of Node which may occur before Bibliographic Nodes."""
871 class Bibliographic: pass
873 class Decorative(PreBibliographic): pass
875 class Structural: pass
877 class Body: pass
879 class General(Body): pass
881 class Sequential(Body):
882 """List-like elements."""
884 class Admonition(Body): pass
886 class Special(Body):
887 """Special internal body elements."""
889 class Invisible(PreBibliographic):
890 """Internal elements that don't appear in output."""
892 class Part: pass
894 class Inline: pass
896 class Referential(Resolvable): pass
899 class Targetable(Resolvable):
901 referenced = 0
903 indirect_reference_name = None
904 """Holds the whitespace_normalized_name (contains mixed case) of a target.
905 Required for MoinMoin/reST compatibility."""
908 class Labeled:
909 """Contains a `label` as its first element."""
912 # ==============
913 # Root Element
914 # ==============
916 class document(Root, Structural, Element):
919 The document root element.
921 Do not instantiate this class directly; use
922 `docutils.utils.new_document()` instead.
925 def __init__(self, settings, reporter, *args, **kwargs):
926 Element.__init__(self, *args, **kwargs)
928 self.current_source = None
929 """Path to or description of the input source being processed."""
931 self.current_line = None
932 """Line number (1-based) of `current_source`."""
934 self.settings = settings
935 """Runtime settings data record."""
937 self.reporter = reporter
938 """System message generator."""
940 self.indirect_targets = []
941 """List of indirect target nodes."""
943 self.substitution_defs = {}
944 """Mapping of substitution names to substitution_definition nodes."""
946 self.substitution_names = {}
947 """Mapping of case-normalized substitution names to case-sensitive
948 names."""
950 self.refnames = {}
951 """Mapping of names to lists of referencing nodes."""
953 self.refids = {}
954 """Mapping of ids to lists of referencing nodes."""
956 self.nameids = {}
957 """Mapping of names to unique id's."""
959 self.nametypes = {}
960 """Mapping of names to hyperlink type (boolean: True => explicit,
961 False => implicit."""
963 self.ids = {}
964 """Mapping of ids to nodes."""
966 self.footnote_refs = {}
967 """Mapping of footnote labels to lists of footnote_reference nodes."""
969 self.citation_refs = {}
970 """Mapping of citation labels to lists of citation_reference nodes."""
972 self.autofootnotes = []
973 """List of auto-numbered footnote nodes."""
975 self.autofootnote_refs = []
976 """List of auto-numbered footnote_reference nodes."""
978 self.symbol_footnotes = []
979 """List of symbol footnote nodes."""
981 self.symbol_footnote_refs = []
982 """List of symbol footnote_reference nodes."""
984 self.footnotes = []
985 """List of manually-numbered footnote nodes."""
987 self.citations = []
988 """List of citation nodes."""
990 self.autofootnote_start = 1
991 """Initial auto-numbered footnote number."""
993 self.symbol_footnote_start = 0
994 """Initial symbol footnote symbol index."""
996 self.id_start = 1
997 """Initial ID number."""
999 self.parse_messages = []
1000 """System messages generated while parsing."""
1002 self.transform_messages = []
1003 """System messages generated while applying transforms."""
1005 import docutils.transforms
1006 self.transformer = docutils.transforms.Transformer(self)
1007 """Storage for transforms to be applied to this document."""
1009 self.decoration = None
1010 """Document's `decoration` node."""
1012 self.document = self
1014 def __getstate__(self):
1016 Return dict with unpicklable references removed.
1018 state = self.__dict__.copy()
1019 state['reporter'] = None
1020 state['transformer'] = None
1021 return state
1023 def asdom(self, dom=None):
1024 """Return a DOM representation of this document."""
1025 if dom is None:
1026 import xml.dom.minidom as dom
1027 domroot = dom.Document()
1028 domroot.appendChild(self._dom_node(domroot))
1029 return domroot
1031 def set_id(self, node, msgnode=None):
1032 for id in node['ids']:
1033 if id in self.ids and self.ids[id] is not node:
1034 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1035 if msgnode != None:
1036 msgnode += msg
1037 if not node['ids']:
1038 for name in node['names']:
1039 id = self.settings.id_prefix + make_id(name)
1040 if id and id not in self.ids:
1041 break
1042 else:
1043 id = ''
1044 while not id or id in self.ids:
1045 id = (self.settings.id_prefix +
1046 self.settings.auto_id_prefix + str(self.id_start))
1047 self.id_start += 1
1048 node['ids'].append(id)
1049 self.ids[id] = node
1050 return id
1052 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1054 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1055 booleans representing hyperlink type (True==explicit,
1056 False==implicit). This method updates the mappings.
1058 The following state transition table shows how `self.nameids` ("ids")
1059 and `self.nametypes` ("types") change with new input (a call to this
1060 method), and what actions are performed ("implicit"-type system
1061 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1063 ==== ===== ======== ======== ======= ==== ===== =====
1064 Old State Input Action New State Notes
1065 ----------- -------- ----------------- ----------- -----
1066 ids types new type sys.msg. dupname ids types
1067 ==== ===== ======== ======== ======= ==== ===== =====
1068 - - explicit - - new True
1069 - - implicit - - new False
1070 None False explicit - - new True
1071 old False explicit implicit old new True
1072 None True explicit explicit new None True
1073 old True explicit explicit new,old None True [#]_
1074 None False implicit implicit new None False
1075 old False implicit implicit new,old None False
1076 None True implicit implicit new None True
1077 old True implicit implicit new old True
1078 ==== ===== ======== ======== ======= ==== ===== =====
1080 .. [#] Do not clear the name-to-id map or invalidate the old target if
1081 both old and new targets are external and refer to identical URIs.
1082 The new target is invalidated regardless.
1084 for name in node['names']:
1085 if name in self.nameids:
1086 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1087 else:
1088 self.nameids[name] = id
1089 self.nametypes[name] = explicit
1091 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1092 old_id = self.nameids[name]
1093 old_explicit = self.nametypes[name]
1094 self.nametypes[name] = old_explicit or explicit
1095 if explicit:
1096 if old_explicit:
1097 level = 2
1098 if old_id is not None:
1099 old_node = self.ids[old_id]
1100 if 'refuri' in node:
1101 refuri = node['refuri']
1102 if old_node['names'] \
1103 and 'refuri' in old_node \
1104 and old_node['refuri'] == refuri:
1105 level = 1 # just inform if refuri's identical
1106 if level > 1:
1107 dupname(old_node, name)
1108 self.nameids[name] = None
1109 msg = self.reporter.system_message(
1110 level, 'Duplicate explicit target name: "%s".' % name,
1111 backrefs=[id], base_node=node)
1112 if msgnode != None:
1113 msgnode += msg
1114 dupname(node, name)
1115 else:
1116 self.nameids[name] = id
1117 if old_id is not None:
1118 old_node = self.ids[old_id]
1119 dupname(old_node, name)
1120 else:
1121 if old_id is not None and not old_explicit:
1122 self.nameids[name] = None
1123 old_node = self.ids[old_id]
1124 dupname(old_node, name)
1125 dupname(node, name)
1126 if not explicit or (not old_explicit and old_id is not None):
1127 msg = self.reporter.info(
1128 'Duplicate implicit target name: "%s".' % name,
1129 backrefs=[id], base_node=node)
1130 if msgnode != None:
1131 msgnode += msg
1133 def has_name(self, name):
1134 return name in self.nameids
1136 # "note" here is an imperative verb: "take note of".
1137 def note_implicit_target(self, target, msgnode=None):
1138 id = self.set_id(target, msgnode)
1139 self.set_name_id_map(target, id, msgnode, explicit=None)
1141 def note_explicit_target(self, target, msgnode=None):
1142 id = self.set_id(target, msgnode)
1143 self.set_name_id_map(target, id, msgnode, explicit=True)
1145 def note_refname(self, node):
1146 self.refnames.setdefault(node['refname'], []).append(node)
1148 def note_refid(self, node):
1149 self.refids.setdefault(node['refid'], []).append(node)
1151 def note_indirect_target(self, target):
1152 self.indirect_targets.append(target)
1153 if target['names']:
1154 self.note_refname(target)
1156 def note_anonymous_target(self, target):
1157 self.set_id(target)
1159 def note_autofootnote(self, footnote):
1160 self.set_id(footnote)
1161 self.autofootnotes.append(footnote)
1163 def note_autofootnote_ref(self, ref):
1164 self.set_id(ref)
1165 self.autofootnote_refs.append(ref)
1167 def note_symbol_footnote(self, footnote):
1168 self.set_id(footnote)
1169 self.symbol_footnotes.append(footnote)
1171 def note_symbol_footnote_ref(self, ref):
1172 self.set_id(ref)
1173 self.symbol_footnote_refs.append(ref)
1175 def note_footnote(self, footnote):
1176 self.set_id(footnote)
1177 self.footnotes.append(footnote)
1179 def note_footnote_ref(self, ref):
1180 self.set_id(ref)
1181 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1182 self.note_refname(ref)
1184 def note_citation(self, citation):
1185 self.citations.append(citation)
1187 def note_citation_ref(self, ref):
1188 self.set_id(ref)
1189 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1190 self.note_refname(ref)
1192 def note_substitution_def(self, subdef, def_name, msgnode=None):
1193 name = whitespace_normalize_name(def_name)
1194 if name in self.substitution_defs:
1195 msg = self.reporter.error(
1196 'Duplicate substitution definition name: "%s".' % name,
1197 base_node=subdef)
1198 if msgnode != None:
1199 msgnode += msg
1200 oldnode = self.substitution_defs[name]
1201 dupname(oldnode, name)
1202 # keep only the last definition:
1203 self.substitution_defs[name] = subdef
1204 # case-insensitive mapping:
1205 self.substitution_names[fully_normalize_name(name)] = name
1207 def note_substitution_ref(self, subref, refname):
1208 subref['refname'] = whitespace_normalize_name(refname)
1210 def note_pending(self, pending, priority=None):
1211 self.transformer.add_pending(pending, priority)
1213 def note_parse_message(self, message):
1214 self.parse_messages.append(message)
1216 def note_transform_message(self, message):
1217 self.transform_messages.append(message)
1219 def note_source(self, source, offset):
1220 self.current_source = source
1221 if offset is None:
1222 self.current_line = offset
1223 else:
1224 self.current_line = offset + 1
1226 def copy(self):
1227 return self.__class__(self.settings, self.reporter,
1228 **self.attributes)
1230 def get_decoration(self):
1231 if not self.decoration:
1232 self.decoration = decoration()
1233 index = self.first_child_not_matching_class(Titular)
1234 if index is None:
1235 self.append(self.decoration)
1236 else:
1237 self.insert(index, self.decoration)
1238 return self.decoration
1241 # ================
1242 # Title Elements
1243 # ================
1245 class title(Titular, PreBibliographic, TextElement): pass
1246 class subtitle(Titular, PreBibliographic, TextElement): pass
1247 class rubric(Titular, TextElement): pass
1250 # ========================
1251 # Bibliographic Elements
1252 # ========================
1254 class docinfo(Bibliographic, Element): pass
1255 class author(Bibliographic, TextElement): pass
1256 class authors(Bibliographic, Element): pass
1257 class organization(Bibliographic, TextElement): pass
1258 class address(Bibliographic, FixedTextElement): pass
1259 class contact(Bibliographic, TextElement): pass
1260 class version(Bibliographic, TextElement): pass
1261 class revision(Bibliographic, TextElement): pass
1262 class status(Bibliographic, TextElement): pass
1263 class date(Bibliographic, TextElement): pass
1264 class copyright(Bibliographic, TextElement): pass
1267 # =====================
1268 # Decorative Elements
1269 # =====================
1271 class decoration(Decorative, Element):
1273 def get_header(self):
1274 if not len(self.children) or not isinstance(self.children[0], header):
1275 self.insert(0, header())
1276 return self.children[0]
1278 def get_footer(self):
1279 if not len(self.children) or not isinstance(self.children[-1], footer):
1280 self.append(footer())
1281 return self.children[-1]
1284 class header(Decorative, Element): pass
1285 class footer(Decorative, Element): pass
1288 # =====================
1289 # Structural Elements
1290 # =====================
1292 class section(Structural, Element): pass
1295 class topic(Structural, Element):
1298 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1299 or textual figures. A topic is just like a section, except that it has no
1300 subsections, and it doesn't have to conform to section placement rules.
1302 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1303 but only at the top level of a section or document. Topics cannot nest
1304 inside topics, sidebars, or body elements; you can't have a topic inside a
1305 table, list, block quote, etc.
1309 class sidebar(Structural, Element):
1312 Sidebars are like miniature, parallel documents that occur inside other
1313 documents, providing related or reference material. A sidebar is
1314 typically offset by a border and "floats" to the side of the page; the
1315 document's main text may flow around it. Sidebars can also be likened to
1316 super-footnotes; their content is outside of the flow of the document's
1317 main text.
1319 Sidebars are allowed wherever body elements (list, table, etc.) are
1320 allowed, but only at the top level of a section or document. Sidebars
1321 cannot nest inside sidebars, topics, or body elements; you can't have a
1322 sidebar inside a table, list, block quote, etc.
1326 class transition(Structural, Element): pass
1329 # ===============
1330 # Body Elements
1331 # ===============
1333 class paragraph(General, TextElement): pass
1334 class compound(General, Element): pass
1335 class container(General, Element): pass
1336 class bullet_list(Sequential, Element): pass
1337 class enumerated_list(Sequential, Element): pass
1338 class list_item(Part, Element): pass
1339 class definition_list(Sequential, Element): pass
1340 class definition_list_item(Part, Element): pass
1341 class term(Part, TextElement): pass
1342 class classifier(Part, TextElement): pass
1343 class definition(Part, Element): pass
1344 class field_list(Sequential, Element): pass
1345 class field(Part, Element): pass
1346 class field_name(Part, TextElement): pass
1347 class field_body(Part, Element): pass
1350 class option(Part, Element):
1352 child_text_separator = ''
1355 class option_argument(Part, TextElement):
1357 def astext(self):
1358 return self.get('delimiter', ' ') + TextElement.astext(self)
1361 class option_group(Part, Element):
1363 child_text_separator = ', '
1366 class option_list(Sequential, Element): pass
1369 class option_list_item(Part, Element):
1371 child_text_separator = ' '
1374 class option_string(Part, TextElement): pass
1375 class description(Part, Element): pass
1376 class literal_block(General, FixedTextElement): pass
1377 class doctest_block(General, FixedTextElement): pass
1378 class math_block(General, FixedTextElement): pass
1379 class line_block(General, Element): pass
1382 class line(Part, TextElement):
1384 indent = None
1387 class block_quote(General, Element): pass
1388 class attribution(Part, TextElement): pass
1389 class attention(Admonition, Element): pass
1390 class caution(Admonition, Element): pass
1391 class danger(Admonition, Element): pass
1392 class error(Admonition, Element): pass
1393 class important(Admonition, Element): pass
1394 class note(Admonition, Element): pass
1395 class tip(Admonition, Element): pass
1396 class hint(Admonition, Element): pass
1397 class warning(Admonition, Element): pass
1398 class admonition(Admonition, Element): pass
1399 class comment(Special, Invisible, FixedTextElement): pass
1400 class substitution_definition(Special, Invisible, TextElement): pass
1401 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1402 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1403 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1404 class label(Part, TextElement): pass
1405 class figure(General, Element): pass
1406 class caption(Part, TextElement): pass
1407 class legend(Part, Element): pass
1408 class table(General, Element): pass
1409 class tgroup(Part, Element): pass
1410 class colspec(Part, Element): pass
1411 class thead(Part, Element): pass
1412 class tbody(Part, Element): pass
1413 class row(Part, Element): pass
1414 class entry(Part, Element): pass
1417 class system_message(Special, BackLinkable, PreBibliographic, Element):
1420 System message element.
1422 Do not instantiate this class directly; use
1423 ``document.reporter.info/warning/error/severe()`` instead.
1426 def __init__(self, message=None, *children, **attributes):
1427 if message:
1428 p = paragraph('', message)
1429 children = (p,) + children
1430 try:
1431 Element.__init__(self, '', *children, **attributes)
1432 except:
1433 print 'system_message: children=%r' % (children,)
1434 raise
1436 def astext(self):
1437 line = self.get('line', '')
1438 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1439 self['level'], Element.astext(self))
1442 class pending(Special, Invisible, Element):
1445 The "pending" element is used to encapsulate a pending operation: the
1446 operation (transform), the point at which to apply it, and any data it
1447 requires. Only the pending operation's location within the document is
1448 stored in the public document tree (by the "pending" object itself); the
1449 operation and its data are stored in the "pending" object's internal
1450 instance attributes.
1452 For example, say you want a table of contents in your reStructuredText
1453 document. The easiest way to specify where to put it is from within the
1454 document, with a directive::
1456 .. contents::
1458 But the "contents" directive can't do its work until the entire document
1459 has been parsed and possibly transformed to some extent. So the directive
1460 code leaves a placeholder behind that will trigger the second phase of its
1461 processing, something like this::
1463 <pending ...public attributes...> + internal attributes
1465 Use `document.note_pending()` so that the
1466 `docutils.transforms.Transformer` stage of processing can run all pending
1467 transforms.
1470 def __init__(self, transform, details=None,
1471 rawsource='', *children, **attributes):
1472 Element.__init__(self, rawsource, *children, **attributes)
1474 self.transform = transform
1475 """The `docutils.transforms.Transform` class implementing the pending
1476 operation."""
1478 self.details = details or {}
1479 """Detail data (dictionary) required by the pending operation."""
1481 def pformat(self, indent=' ', level=0):
1482 internals = [
1483 '.. internal attributes:',
1484 ' .transform: %s.%s' % (self.transform.__module__,
1485 self.transform.__name__),
1486 ' .details:']
1487 details = self.details.items()
1488 details.sort()
1489 for key, value in details:
1490 if isinstance(value, Node):
1491 internals.append('%7s%s:' % ('', key))
1492 internals.extend(['%9s%s' % ('', line)
1493 for line in value.pformat().splitlines()])
1494 elif value and isinstance(value, list) \
1495 and isinstance(value[0], Node):
1496 internals.append('%7s%s:' % ('', key))
1497 for v in value:
1498 internals.extend(['%9s%s' % ('', line)
1499 for line in v.pformat().splitlines()])
1500 else:
1501 internals.append('%7s%s: %r' % ('', key, value))
1502 return (Element.pformat(self, indent, level)
1503 + ''.join([(' %s%s\n' % (indent * level, line))
1504 for line in internals]))
1506 def copy(self):
1507 return self.__class__(self.transform, self.details, self.rawsource,
1508 **self.attributes)
1511 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1514 Raw data that is to be passed untouched to the Writer.
1517 pass
1520 # =================
1521 # Inline Elements
1522 # =================
1524 class emphasis(Inline, TextElement): pass
1525 class strong(Inline, TextElement): pass
1526 class literal(Inline, TextElement): pass
1527 class reference(General, Inline, Referential, TextElement): pass
1528 class footnote_reference(Inline, Referential, TextElement): pass
1529 class citation_reference(Inline, Referential, TextElement): pass
1530 class substitution_reference(Inline, TextElement): pass
1531 class title_reference(Inline, TextElement): pass
1532 class abbreviation(Inline, TextElement): pass
1533 class acronym(Inline, TextElement): pass
1534 class superscript(Inline, TextElement): pass
1535 class subscript(Inline, TextElement): pass
1536 class math(Inline, TextElement): pass
1539 class image(General, Inline, Element):
1541 def astext(self):
1542 return self.get('alt', '')
1545 class inline(Inline, TextElement): pass
1546 class problematic(Inline, TextElement): pass
1547 class generated(Inline, TextElement): pass
1550 # ========================================
1551 # Auxiliary Classes, Functions, and Data
1552 # ========================================
1554 node_class_names = """
1555 Text
1556 abbreviation acronym address admonition attention attribution author
1557 authors
1558 block_quote bullet_list
1559 caption caution citation citation_reference classifier colspec comment
1560 compound contact container copyright
1561 danger date decoration definition definition_list definition_list_item
1562 description docinfo doctest_block document
1563 emphasis entry enumerated_list error
1564 field field_body field_list field_name figure footer
1565 footnote footnote_reference
1566 generated
1567 header hint
1568 image important inline
1569 label legend line line_block list_item literal literal_block
1570 math math_block
1571 note
1572 option option_argument option_group option_list option_list_item
1573 option_string organization
1574 paragraph pending problematic
1575 raw reference revision row rubric
1576 section sidebar status strong subscript substitution_definition
1577 substitution_reference subtitle superscript system_message
1578 table target tbody term tgroup thead tip title title_reference topic
1579 transition
1580 version
1581 warning""".split()
1582 """A list of names of all concrete Node subclasses."""
1585 class NodeVisitor:
1588 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1589 document tree traversals.
1591 Each node class has corresponding methods, doing nothing by
1592 default; override individual methods for specific and useful
1593 behaviour. The `dispatch_visit()` method is called by
1594 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1595 the `dispatch_departure()` method before exiting a node.
1597 The dispatch methods call "``visit_`` + node class name" or
1598 "``depart_`` + node class name", resp.
1600 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1601 methods should be implemented for *all* node types encountered (such as
1602 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1603 raise exceptions.
1605 For sparse traversals, where only certain node types are of interest,
1606 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1607 processing is desired, subclass `GenericNodeVisitor`.
1609 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1610 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1611 1995.
1614 optional = ()
1616 Tuple containing node class names (as strings).
1618 No exception will be raised if writers do not implement visit
1619 or departure functions for these node classes.
1621 Used to ensure transitional compatibility with existing 3rd-party writers.
1624 def __init__(self, document):
1625 self.document = document
1627 def dispatch_visit(self, node):
1629 Call self."``visit_`` + node class name" with `node` as
1630 parameter. If the ``visit_...`` method does not exist, call
1631 self.unknown_visit.
1633 node_name = node.__class__.__name__
1634 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1635 self.document.reporter.debug(
1636 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1637 % (method.__name__, node_name))
1638 return method(node)
1640 def dispatch_departure(self, node):
1642 Call self."``depart_`` + node class name" with `node` as
1643 parameter. If the ``depart_...`` method does not exist, call
1644 self.unknown_departure.
1646 node_name = node.__class__.__name__
1647 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1648 self.document.reporter.debug(
1649 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1650 % (method.__name__, node_name))
1651 return method(node)
1653 def unknown_visit(self, node):
1655 Called when entering unknown `Node` types.
1657 Raise an exception unless overridden.
1659 if (self.document.settings.strict_visitor
1660 or node.__class__.__name__ not in self.optional):
1661 raise NotImplementedError(
1662 '%s visiting unknown node type: %s'
1663 % (self.__class__, node.__class__.__name__))
1665 def unknown_departure(self, node):
1667 Called before exiting unknown `Node` types.
1669 Raise exception unless overridden.
1671 if (self.document.settings.strict_visitor
1672 or node.__class__.__name__ not in self.optional):
1673 raise NotImplementedError(
1674 '%s departing unknown node type: %s'
1675 % (self.__class__, node.__class__.__name__))
1678 class SparseNodeVisitor(NodeVisitor):
1681 Base class for sparse traversals, where only certain node types are of
1682 interest. When ``visit_...`` & ``depart_...`` methods should be
1683 implemented for *all* node types (such as for `docutils.writers.Writer`
1684 subclasses), subclass `NodeVisitor` instead.
1688 class GenericNodeVisitor(NodeVisitor):
1691 Generic "Visitor" abstract superclass, for simple traversals.
1693 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1694 each ``depart_...`` method (when using `Node.walkabout()`) calls
1695 `default_departure()`. `default_visit()` (and `default_departure()`) must
1696 be overridden in subclasses.
1698 Define fully generic visitors by overriding `default_visit()` (and
1699 `default_departure()`) only. Define semi-generic visitors by overriding
1700 individual ``visit_...()`` (and ``depart_...()``) methods also.
1702 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1703 be overridden for default behavior.
1706 def default_visit(self, node):
1707 """Override for generic, uniform traversals."""
1708 raise NotImplementedError
1710 def default_departure(self, node):
1711 """Override for generic, uniform traversals."""
1712 raise NotImplementedError
1714 def _call_default_visit(self, node):
1715 self.default_visit(node)
1717 def _call_default_departure(self, node):
1718 self.default_departure(node)
1720 def _nop(self, node):
1721 pass
1723 def _add_node_class_names(names):
1724 """Save typing with dynamic assignments:"""
1725 for _name in names:
1726 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1727 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1728 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1729 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1731 _add_node_class_names(node_class_names)
1734 class TreeCopyVisitor(GenericNodeVisitor):
1737 Make a complete copy of a tree or branch, including element attributes.
1740 def __init__(self, document):
1741 GenericNodeVisitor.__init__(self, document)
1742 self.parent_stack = []
1743 self.parent = []
1745 def get_tree_copy(self):
1746 return self.parent[0]
1748 def default_visit(self, node):
1749 """Copy the current node, and make it the new acting parent."""
1750 newnode = node.copy()
1751 self.parent.append(newnode)
1752 self.parent_stack.append(self.parent)
1753 self.parent = newnode
1755 def default_departure(self, node):
1756 """Restore the previous acting parent."""
1757 self.parent = self.parent_stack.pop()
1760 class TreePruningException(Exception):
1763 Base class for `NodeVisitor`-related tree pruning exceptions.
1765 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1766 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1767 the tree traversed.
1770 pass
1773 class SkipChildren(TreePruningException):
1776 Do not visit any children of the current node. The current node's
1777 siblings and ``depart_...`` method are not affected.
1780 pass
1783 class SkipSiblings(TreePruningException):
1786 Do not visit any more siblings (to the right) of the current node. The
1787 current node's children and its ``depart_...`` method are not affected.
1790 pass
1793 class SkipNode(TreePruningException):
1796 Do not visit the current node's children, and do not call the current
1797 node's ``depart_...`` method.
1800 pass
1803 class SkipDeparture(TreePruningException):
1806 Do not call the current node's ``depart_...`` method. The current node's
1807 children and siblings are not affected.
1810 pass
1813 class NodeFound(TreePruningException):
1816 Raise to indicate that the target of a search has been found. This
1817 exception must be caught by the client; it is not caught by the traversal
1818 code.
1821 pass
1824 class StopTraversal(TreePruningException):
1827 Stop the traversal alltogether. The current node's ``depart_...`` method
1828 is not affected. The parent nodes ``depart_...`` methods are also called
1829 as usual. No other nodes are visited. This is an alternative to
1830 NodeFound that does not cause exception handling to trickle up to the
1831 caller.
1834 pass
1837 def make_id(string):
1839 Convert `string` into an identifier and return it.
1841 Docutils identifiers will conform to the regular expression
1842 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1843 and "id" attributes) should have no underscores, colons, or periods.
1844 Hyphens may be used.
1846 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1848 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1849 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1850 underscores ("_"), colons (":"), and periods (".").
1852 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1853 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1854 "escape" 8-bit characters have been replaced with entities)::
1856 unicode \\[0-9a-f]{1,4}
1857 latin1 [&iexcl;-&yuml;]
1858 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1859 nmchar [-a-z0-9]|{latin1}|{escape}
1860 name {nmchar}+
1862 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1863 or periods ("."), therefore "class" and "id" attributes should not contain
1864 these characters. They should be replaced with hyphens ("-"). Combined
1865 with HTML's requirements (the first character must be a letter; no
1866 "unicode", "latin1", or "escape" characters), this results in the
1867 ``[a-z](-?[a-z0-9]+)*`` pattern.
1869 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1870 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1872 id = string.lower()
1873 if not isinstance(id, unicode):
1874 id = id.decode()
1875 id = id.translate(_non_id_translate_digraphs)
1876 id = id.translate(_non_id_translate)
1877 # get rid of non-ascii characters.
1878 # 'ascii' lowercase to prevent problems with turkish locale.
1879 id = unicodedata.normalize('NFKD', id).\
1880 encode('ascii', 'ignore').decode('ascii')
1881 # shrink runs of whitespace and replace by hyphen
1882 id = _non_id_chars.sub('-', ' '.join(id.split()))
1883 id = _non_id_at_ends.sub('', id)
1884 return str(id)
1886 _non_id_chars = re.compile('[^a-z0-9]+')
1887 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1888 _non_id_translate = {
1889 0x00f8: u'o', # o with stroke
1890 0x0111: u'd', # d with stroke
1891 0x0127: u'h', # h with stroke
1892 0x0131: u'i', # dotless i
1893 0x0142: u'l', # l with stroke
1894 0x0167: u't', # t with stroke
1895 0x0180: u'b', # b with stroke
1896 0x0183: u'b', # b with topbar
1897 0x0188: u'c', # c with hook
1898 0x018c: u'd', # d with topbar
1899 0x0192: u'f', # f with hook
1900 0x0199: u'k', # k with hook
1901 0x019a: u'l', # l with bar
1902 0x019e: u'n', # n with long right leg
1903 0x01a5: u'p', # p with hook
1904 0x01ab: u't', # t with palatal hook
1905 0x01ad: u't', # t with hook
1906 0x01b4: u'y', # y with hook
1907 0x01b6: u'z', # z with stroke
1908 0x01e5: u'g', # g with stroke
1909 0x0225: u'z', # z with hook
1910 0x0234: u'l', # l with curl
1911 0x0235: u'n', # n with curl
1912 0x0236: u't', # t with curl
1913 0x0237: u'j', # dotless j
1914 0x023c: u'c', # c with stroke
1915 0x023f: u's', # s with swash tail
1916 0x0240: u'z', # z with swash tail
1917 0x0247: u'e', # e with stroke
1918 0x0249: u'j', # j with stroke
1919 0x024b: u'q', # q with hook tail
1920 0x024d: u'r', # r with stroke
1921 0x024f: u'y', # y with stroke
1923 _non_id_translate_digraphs = {
1924 0x00df: u'sz', # ligature sz
1925 0x00e6: u'ae', # ae
1926 0x0153: u'oe', # ligature oe
1927 0x0238: u'db', # db digraph
1928 0x0239: u'qp', # qp digraph
1931 def dupname(node, name):
1932 node['dupnames'].append(name)
1933 node['names'].remove(name)
1934 # Assume that this method is referenced, even though it isn't; we
1935 # don't want to throw unnecessary system_messages.
1936 node.referenced = 1
1938 def fully_normalize_name(name):
1939 """Return a case- and whitespace-normalized name."""
1940 return ' '.join(name.lower().split())
1942 def whitespace_normalize_name(name):
1943 """Return a whitespace-normalized name."""
1944 return ' '.join(name.split())
1946 def serial_escape(value):
1947 """Escape string values that are elements of a list, for serialization."""
1948 return value.replace('\\', r'\\').replace(' ', r'\ ')
1950 def pseudo_quoteattr(value):
1951 """Quote attributes for pseudo-xml"""
1952 return '"%s"' % value
1954 # \f
1956 # Local Variables:
1957 # indent-tabs-mode: nil
1958 # sentence-end-double-space: t
1959 # fill-column: 78
1960 # End: