Cleanup: Use True/False for boolean values
[docutils.git] / docutils / nodes.py
blob559253ff809342ed25b52bebe4546489ee88e09b
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Docutils document tree element class library.
8 Classes in CamelCase are abstract base classes or auxiliary classes. The one
9 exception is `Text`, for a text (PCDATA) node; uppercase is used to
10 differentiate from element classes. Classes in lower_case_with_underscores
11 are element classes, matching the XML element generic identifiers in the DTD_.
13 The position of each node (the level at which it can occur) is significant and
14 is represented by abstract base classes (`Root`, `Structural`, `Body`,
15 `Inline`, etc.). Certain transformations will be easier because we can use
16 ``isinstance(node, base_class)`` to determine the position of the node in the
17 hierarchy.
19 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
20 """
22 __docformat__ = 'reStructuredText'
24 import sys
25 import os
26 import re
27 import warnings
28 import types
29 import unicodedata
31 # ==============================
32 # Functional Node Base Classes
33 # ==============================
35 class Node(object):
37 """Abstract base class of nodes in a document tree."""
39 parent = None
40 """Back-reference to the Node immediately containing this Node."""
42 document = None
43 """The `document` node at the root of the tree containing this Node."""
45 source = None
46 """Path or description of the input source which generated this Node."""
48 line = None
49 """The line number (1-based) of the beginning of this Node in `source`."""
51 def __nonzero__(self):
52 """
53 Node instances are always true, even if they're empty. A node is more
54 than a simple container. Its boolean "truth" does not depend on
55 having one or more subnodes in the doctree.
57 Use `len()` to check node length. Use `None` to represent a boolean
58 false value.
59 """
60 return True
62 if sys.version_info < (3,):
63 # on 2.x, str(node) will be a byte string with Unicode
64 # characters > 255 escaped; on 3.x this is no longer necessary
65 def __str__(self):
66 return unicode(self).encode('raw_unicode_escape')
68 def asdom(self, dom=None):
69 """Return a DOM **fragment** representation of this Node."""
70 if dom is None:
71 import xml.dom.minidom as dom
72 domroot = dom.Document()
73 return self._dom_node(domroot)
75 def pformat(self, indent=' ', level=0):
76 """
77 Return an indented pseudo-XML representation, for test purposes.
79 Override in subclasses.
80 """
81 raise NotImplementedError
83 def copy(self):
84 """Return a copy of self."""
85 raise NotImplementedError
87 def deepcopy(self):
88 """Return a deep copy of self (also copying children)."""
89 raise NotImplementedError
91 def setup_child(self, child):
92 child.parent = self
93 if self.document:
94 child.document = self.document
95 if child.source is None:
96 child.source = self.document.current_source
97 if child.line is None:
98 child.line = self.document.current_line
100 def walk(self, visitor):
102 Traverse a tree of `Node` objects, calling the
103 `dispatch_visit()` method of `visitor` when entering each
104 node. (The `walkabout()` method is similar, except it also
105 calls the `dispatch_departure()` method before exiting each
106 node.)
108 This tree traversal supports limited in-place tree
109 modifications. Replacing one node with one or more nodes is
110 OK, as is removing an element. However, if the node removed
111 or replaced occurs after the current node, the old node will
112 still be traversed, and any new nodes will not.
114 Within ``visit`` methods (and ``depart`` methods for
115 `walkabout()`), `TreePruningException` subclasses may be raised
116 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
118 Parameter `visitor`: A `NodeVisitor` object, containing a
119 ``visit`` implementation for each `Node` subclass encountered.
121 Return true if we should stop the traversal.
123 stop = False
124 visitor.document.reporter.debug(
125 'docutils.nodes.Node.walk calling dispatch_visit for %s'
126 % self.__class__.__name__)
127 try:
128 try:
129 visitor.dispatch_visit(self)
130 except (SkipChildren, SkipNode):
131 return stop
132 except SkipDeparture: # not applicable; ignore
133 pass
134 children = self.children
135 try:
136 for child in children[:]:
137 if child.walk(visitor):
138 stop = True
139 break
140 except SkipSiblings:
141 pass
142 except StopTraversal:
143 stop = True
144 return stop
146 def walkabout(self, visitor):
148 Perform a tree traversal similarly to `Node.walk()` (which
149 see), except also call the `dispatch_departure()` method
150 before exiting each node.
152 Parameter `visitor`: A `NodeVisitor` object, containing a
153 ``visit`` and ``depart`` implementation for each `Node`
154 subclass encountered.
156 Return true if we should stop the traversal.
158 call_depart = True
159 stop = False
160 visitor.document.reporter.debug(
161 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
162 % self.__class__.__name__)
163 try:
164 try:
165 visitor.dispatch_visit(self)
166 except SkipNode:
167 return stop
168 except SkipDeparture:
169 call_depart = False
170 children = self.children
171 try:
172 for child in children[:]:
173 if child.walkabout(visitor):
174 stop = True
175 break
176 except SkipSiblings:
177 pass
178 except SkipChildren:
179 pass
180 except StopTraversal:
181 stop = True
182 if call_depart:
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walkabout calling dispatch_departure '
185 'for %s' % self.__class__.__name__)
186 visitor.dispatch_departure(self)
187 return stop
189 def _fast_traverse(self, cls):
190 """Specialized traverse() that only supports instance checks."""
191 result = []
192 if isinstance(self, cls):
193 result.append(self)
194 for child in self.children:
195 result.extend(child._fast_traverse(cls))
196 return result
198 def _all_traverse(self):
199 """Specialized traverse() that doesn't check for a condition."""
200 result = []
201 result.append(self)
202 for child in self.children:
203 result.extend(child._all_traverse())
204 return result
206 def traverse(self, condition=None, include_self=True, descend=True,
207 siblings=False, ascend=False):
209 Return an iterable containing
211 * self (if include_self is true)
212 * all descendants in tree traversal order (if descend is true)
213 * all siblings (if siblings is true) and their descendants (if
214 also descend is true)
215 * the siblings of the parent (if ascend is true) and their
216 descendants (if also descend is true), and so on
218 If `condition` is not None, the iterable contains only nodes
219 for which ``condition(node)`` is true. If `condition` is a
220 node class ``cls``, it is equivalent to a function consisting
221 of ``return isinstance(node, cls)``.
223 If ascend is true, assume siblings to be true as well.
225 For example, given the following tree::
227 <paragraph>
228 <emphasis> <--- emphasis.traverse() and
229 <strong> <--- strong.traverse() are called.
232 <reference name="Baz" refid="baz">
235 Then list(emphasis.traverse()) equals ::
237 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
239 and list(strong.traverse(ascend=True)) equals ::
241 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
243 if ascend:
244 siblings=True
245 # Check for special argument combinations that allow using an
246 # optimized version of traverse()
247 if include_self and descend and not siblings:
248 if condition is None:
249 return self._all_traverse()
250 elif isinstance(condition, (types.ClassType, type)):
251 return self._fast_traverse(condition)
252 # Check if `condition` is a class (check for TypeType for Python
253 # implementations that use only new-style classes, like PyPy).
254 if isinstance(condition, (types.ClassType, type)):
255 node_class = condition
256 def condition(node, node_class=node_class):
257 return isinstance(node, node_class)
258 r = []
259 if include_self and (condition is None or condition(self)):
260 r.append(self)
261 if descend and len(self.children):
262 for child in self:
263 r.extend(child.traverse(include_self=True, descend=True,
264 siblings=False, ascend=False,
265 condition=condition))
266 if siblings or ascend:
267 node = self
268 while node.parent:
269 index = node.parent.index(node)
270 for sibling in node.parent[index+1:]:
271 r.extend(sibling.traverse(include_self=True,
272 descend=descend,
273 siblings=False, ascend=False,
274 condition=condition))
275 if not ascend:
276 break
277 else:
278 node = node.parent
279 return r
281 def next_node(self, condition=None, include_self=False, descend=True,
282 siblings=False, ascend=False):
284 Return the first node in the iterable returned by traverse(),
285 or None if the iterable is empty.
287 Parameter list is the same as of traverse. Note that
288 include_self defaults to 0, though.
290 iterable = self.traverse(condition=condition,
291 include_self=include_self, descend=descend,
292 siblings=siblings, ascend=ascend)
293 try:
294 return iterable[0]
295 except IndexError:
296 return None
298 if sys.version_info < (3,):
299 class reprunicode(unicode):
301 A class that removes the initial u from unicode's repr.
304 def __repr__(self):
305 return unicode.__repr__(self)[1:]
306 else:
307 reprunicode = unicode
310 class Text(Node, reprunicode):
313 Instances are terminal nodes (leaves) containing text only; no child
314 nodes or attributes. Initialize by passing a string to the constructor.
315 Access the text itself with the `astext` method.
318 tagname = '#text'
320 children = ()
321 """Text nodes have no children, and cannot have children."""
323 if sys.version_info > (3,):
324 def __new__(cls, data, rawsource=None):
325 """Prevent the rawsource argument from propagating to str."""
326 if isinstance(data, bytes):
327 raise TypeError('expecting str data, not bytes')
328 return reprunicode.__new__(cls, data)
329 else:
330 def __new__(cls, data, rawsource=None):
331 """Prevent the rawsource argument from propagating to str."""
332 return reprunicode.__new__(cls, data)
334 def __init__(self, data, rawsource=''):
336 self.rawsource = rawsource
337 """The raw text from which this element was constructed."""
339 def shortrepr(self, maxlen=18):
340 data = self
341 if len(data) > maxlen:
342 data = data[:maxlen-4] + ' ...'
343 return '<%s: %s>' % (self.tagname, repr(reprunicode(data)))
345 def __repr__(self):
346 return self.shortrepr(maxlen=68)
348 def _dom_node(self, domroot):
349 return domroot.createTextNode(unicode(self))
351 def astext(self):
352 return reprunicode(self)
354 # Note about __unicode__: The implementation of __unicode__ here,
355 # and the one raising NotImplemented in the superclass Node had
356 # to be removed when changing Text to a subclass of unicode instead
357 # of UserString, since there is no way to delegate the __unicode__
358 # call to the superclass unicode:
359 # unicode itself does not have __unicode__ method to delegate to
360 # and calling unicode(self) or unicode.__new__ directly creates
361 # an infinite loop
363 def copy(self):
364 return self.__class__(reprunicode(self), rawsource=self.rawsource)
366 def deepcopy(self):
367 return self.copy()
369 def pformat(self, indent=' ', level=0):
370 result = []
371 indent = indent * level
372 for line in self.splitlines():
373 result.append(indent + line + '\n')
374 return ''.join(result)
376 # rstrip and lstrip are used by substitution definitions where
377 # they are expected to return a Text instance, this was formerly
378 # taken care of by UserString. Note that then and now the
379 # rawsource member is lost.
381 def rstrip(self, chars=None):
382 return self.__class__(reprunicode.rstrip(self, chars))
383 def lstrip(self, chars=None):
384 return self.__class__(reprunicode.lstrip(self, chars))
386 class Element(Node):
389 `Element` is the superclass to all specific elements.
391 Elements contain attributes and child nodes. Elements emulate
392 dictionaries for attributes, indexing by attribute name (a string). To
393 set the attribute 'att' to 'value', do::
395 element['att'] = 'value'
397 There are two special attributes: 'ids' and 'names'. Both are
398 lists of unique identifiers, and names serve as human interfaces
399 to IDs. Names are case- and whitespace-normalized (see the
400 fully_normalize_name() function), and IDs conform to the regular
401 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
403 Elements also emulate lists for child nodes (element nodes and/or text
404 nodes), indexing by integer. To get the first child node, use::
406 element[0]
408 Elements may be constructed using the ``+=`` operator. To add one new
409 child node to element, do::
411 element += node
413 This is equivalent to ``element.append(node)``.
415 To add a list of multiple child nodes at once, use the same ``+=``
416 operator::
418 element += [node1, node2]
420 This is equivalent to ``element.extend([node1, node2])``.
423 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
424 """List attributes, automatically initialized to empty lists for
425 all nodes."""
427 tagname = None
428 """The element generic identifier. If None, it is set as an instance
429 attribute to the name of the class."""
431 child_text_separator = '\n\n'
432 """Separator for child nodes, used by `astext()` method."""
434 def __init__(self, rawsource='', *children, **attributes):
435 self.rawsource = rawsource
436 """The raw text from which this element was constructed."""
438 self.children = []
439 """List of child nodes (elements and/or `Text`)."""
441 self.extend(children) # maintain parent info
443 self.attributes = {}
444 """Dictionary of attribute {name: value}."""
446 # Initialize list attributes.
447 for att in self.list_attributes:
448 self.attributes[att] = []
450 for att, value in attributes.items():
451 att = att.lower()
452 if att in self.list_attributes:
453 # mutable list; make a copy for this node
454 self.attributes[att] = value[:]
455 else:
456 self.attributes[att] = value
458 if self.tagname is None:
459 self.tagname = self.__class__.__name__
461 def _dom_node(self, domroot):
462 element = domroot.createElement(self.tagname)
463 for attribute, value in self.attlist():
464 if isinstance(value, list):
465 value = ' '.join([serial_escape('%s' % (v,)) for v in value])
466 element.setAttribute(attribute, '%s' % value)
467 for child in self.children:
468 element.appendChild(child._dom_node(domroot))
469 return element
471 def __repr__(self):
472 data = ''
473 for c in self.children:
474 data += c.shortrepr()
475 if len(data) > 60:
476 data = data[:56] + ' ...'
477 break
478 if self['names']:
479 return '<%s "%s": %s>' % (self.__class__.__name__,
480 '; '.join(self['names']), data)
481 else:
482 return '<%s: %s>' % (self.__class__.__name__, data)
484 def shortrepr(self):
485 if self['names']:
486 return '<%s "%s"...>' % (self.__class__.__name__,
487 '; '.join(self['names']))
488 else:
489 return '<%s...>' % self.tagname
491 def __unicode__(self):
492 if self.children:
493 return u'%s%s%s' % (self.starttag(),
494 ''.join([unicode(c) for c in self.children]),
495 self.endtag())
496 else:
497 return self.emptytag()
499 if sys.version_info > (3,):
500 # 2to3 doesn't convert __unicode__ to __str__
501 __str__ = __unicode__
503 def starttag(self, quoteattr=None):
504 # the optional arg is used by the docutils_xml writer
505 if quoteattr is None:
506 quoteattr = pseudo_quoteattr
507 parts = [self.tagname]
508 for name, value in self.attlist():
509 if value is None: # boolean attribute
510 parts.append(name)
511 continue
512 if isinstance(value, list):
513 values = [serial_escape('%s' % (v,)) for v in value]
514 value = ' '.join(values)
515 else:
516 value = unicode(value)
517 value = quoteattr(value)
518 parts.append(u'%s=%s' % (name, value))
519 return u'<%s>' % u' '.join(parts)
521 def endtag(self):
522 return '</%s>' % self.tagname
524 def emptytag(self):
525 return u'<%s/>' % u' '.join([self.tagname] +
526 ['%s="%s"' % (n, v)
527 for n, v in self.attlist()])
529 def __len__(self):
530 return len(self.children)
532 def __contains__(self, key):
533 # support both membership test for children and attributes
534 # (has_key is translated to "in" by 2to3)
535 if isinstance(key, basestring):
536 return key in self.attributes
537 return key in self.children
539 def __getitem__(self, key):
540 if isinstance(key, basestring):
541 return self.attributes[key]
542 elif isinstance(key, int):
543 return self.children[key]
544 elif isinstance(key, types.SliceType):
545 assert key.step in (None, 1), 'cannot handle slice with stride'
546 return self.children[key.start:key.stop]
547 else:
548 raise TypeError, ('element index must be an integer, a slice, or '
549 'an attribute name string')
551 def __setitem__(self, key, item):
552 if isinstance(key, basestring):
553 self.attributes[str(key)] = item
554 elif isinstance(key, int):
555 self.setup_child(item)
556 self.children[key] = item
557 elif isinstance(key, types.SliceType):
558 assert key.step in (None, 1), 'cannot handle slice with stride'
559 for node in item:
560 self.setup_child(node)
561 self.children[key.start:key.stop] = item
562 else:
563 raise TypeError, ('element index must be an integer, a slice, or '
564 'an attribute name string')
566 def __delitem__(self, key):
567 if isinstance(key, basestring):
568 del self.attributes[key]
569 elif isinstance(key, int):
570 del self.children[key]
571 elif isinstance(key, types.SliceType):
572 assert key.step in (None, 1), 'cannot handle slice with stride'
573 del self.children[key.start:key.stop]
574 else:
575 raise TypeError, ('element index must be an integer, a simple '
576 'slice, or an attribute name string')
578 def __add__(self, other):
579 return self.children + other
581 def __radd__(self, other):
582 return other + self.children
584 def __iadd__(self, other):
585 """Append a node or a list of nodes to `self.children`."""
586 if isinstance(other, Node):
587 self.append(other)
588 elif other is not None:
589 self.extend(other)
590 return self
592 def astext(self):
593 return self.child_text_separator.join(
594 [child.astext() for child in self.children])
596 def non_default_attributes(self):
597 atts = {}
598 for key, value in self.attributes.items():
599 if self.is_not_default(key):
600 atts[key] = value
601 return atts
603 def attlist(self):
604 attlist = self.non_default_attributes().items()
605 attlist.sort()
606 return attlist
608 def get(self, key, failobj=None):
609 return self.attributes.get(key, failobj)
611 def hasattr(self, attr):
612 return attr in self.attributes
614 def delattr(self, attr):
615 if attr in self.attributes:
616 del self.attributes[attr]
618 def setdefault(self, key, failobj=None):
619 return self.attributes.setdefault(key, failobj)
621 has_key = hasattr
623 # support operator in
624 __contains__ = hasattr
626 def append(self, item):
627 self.setup_child(item)
628 self.children.append(item)
630 def extend(self, item):
631 for node in item:
632 self.append(node)
634 def insert(self, index, item):
635 if isinstance(item, Node):
636 self.setup_child(item)
637 self.children.insert(index, item)
638 elif item is not None:
639 self[index:index] = item
641 def pop(self, i=-1):
642 return self.children.pop(i)
644 def remove(self, item):
645 self.children.remove(item)
647 def index(self, item):
648 return self.children.index(item)
650 def is_not_default(self, key):
651 if self[key] == [] and key in self.list_attributes:
652 return 0
653 else:
654 return 1
656 def update_basic_atts(self, dict):
658 Update basic attributes ('ids', 'names', 'classes',
659 'dupnames', but not 'source') from node or dictionary `dict`.
661 if isinstance(dict, Node):
662 dict = dict.attributes
663 for att in ('ids', 'classes', 'names', 'dupnames'):
664 for value in dict.get(att, []):
665 if not value in self[att]:
666 self[att].append(value)
668 def clear(self):
669 self.children = []
671 def replace(self, old, new):
672 """Replace one child `Node` with another child or children."""
673 index = self.index(old)
674 if isinstance(new, Node):
675 self.setup_child(new)
676 self[index] = new
677 elif new is not None:
678 self[index:index+1] = new
680 def replace_self(self, new):
682 Replace `self` node with `new`, where `new` is a node or a
683 list of nodes.
685 update = new
686 if not isinstance(new, Node):
687 # `new` is a list; update first child.
688 try:
689 update = new[0]
690 except IndexError:
691 update = None
692 if isinstance(update, Element):
693 update.update_basic_atts(self)
694 else:
695 # `update` is a Text node or `new` is an empty list.
696 # Assert that we aren't losing any attributes.
697 for att in ('ids', 'names', 'classes', 'dupnames'):
698 assert not self[att], \
699 'Losing "%s" attribute: %s' % (att, self[att])
700 self.parent.replace(self, new)
702 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
704 Return the index of the first child whose class exactly matches.
706 Parameters:
708 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
709 classes. If a tuple, any of the classes may match.
710 - `start`: Initial index to check.
711 - `end`: Initial index to *not* check.
713 if not isinstance(childclass, tuple):
714 childclass = (childclass,)
715 for index in range(start, min(len(self), end)):
716 for c in childclass:
717 if isinstance(self[index], c):
718 return index
719 return None
721 def first_child_not_matching_class(self, childclass, start=0,
722 end=sys.maxint):
724 Return the index of the first child whose class does *not* match.
726 Parameters:
728 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
729 classes. If a tuple, none of the classes may match.
730 - `start`: Initial index to check.
731 - `end`: Initial index to *not* check.
733 if not isinstance(childclass, tuple):
734 childclass = (childclass,)
735 for index in range(start, min(len(self), end)):
736 for c in childclass:
737 if isinstance(self.children[index], c):
738 break
739 else:
740 return index
741 return None
743 def pformat(self, indent=' ', level=0):
744 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
745 [child.pformat(indent, level+1)
746 for child in self.children])
748 def copy(self):
749 return self.__class__(rawsource=self.rawsource, **self.attributes)
751 def deepcopy(self):
752 copy = self.copy()
753 copy.extend([child.deepcopy() for child in self.children])
754 return copy
756 def set_class(self, name):
757 """Add a new class to the "classes" attribute."""
758 warnings.warn('docutils.nodes.Element.set_class deprecated; '
759 "append to Element['classes'] list attribute directly",
760 DeprecationWarning, stacklevel=2)
761 assert ' ' not in name
762 self['classes'].append(name.lower())
764 def note_referenced_by(self, name=None, id=None):
765 """Note that this Element has been referenced by its name
766 `name` or id `id`."""
767 self.referenced = 1
768 # Element.expect_referenced_by_* dictionaries map names or ids
769 # to nodes whose ``referenced`` attribute is set to true as
770 # soon as this node is referenced by the given name or id.
771 # Needed for target propagation.
772 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
773 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
774 if by_name:
775 assert name is not None
776 by_name.referenced = 1
777 if by_id:
778 assert id is not None
779 by_id.referenced = 1
782 class TextElement(Element):
785 An element which directly contains text.
787 Its children are all `Text` or `Inline` subclass nodes. You can
788 check whether an element's context is inline simply by checking whether
789 its immediate parent is a `TextElement` instance (including subclasses).
790 This is handy for nodes like `image` that can appear both inline and as
791 standalone body elements.
793 If passing children to `__init__()`, make sure to set `text` to
794 ``''`` or some other suitable value.
797 child_text_separator = ''
798 """Separator for child nodes, used by `astext()` method."""
800 def __init__(self, rawsource='', text='', *children, **attributes):
801 if text != '':
802 textnode = Text(text)
803 Element.__init__(self, rawsource, textnode, *children,
804 **attributes)
805 else:
806 Element.__init__(self, rawsource, *children, **attributes)
809 class FixedTextElement(TextElement):
811 """An element which directly contains preformatted text."""
813 def __init__(self, rawsource='', text='', *children, **attributes):
814 TextElement.__init__(self, rawsource, text, *children, **attributes)
815 self.attributes['xml:space'] = 'preserve'
818 # ========
819 # Mixins
820 # ========
822 class Resolvable:
824 resolved = 0
827 class BackLinkable:
829 def add_backref(self, refid):
830 self['backrefs'].append(refid)
833 # ====================
834 # Element Categories
835 # ====================
837 class Root: pass
839 class Titular: pass
841 class PreBibliographic:
842 """Category of Node which may occur before Bibliographic Nodes."""
844 class Bibliographic: pass
846 class Decorative(PreBibliographic): pass
848 class Structural: pass
850 class Body: pass
852 class General(Body): pass
854 class Sequential(Body):
855 """List-like elements."""
857 class Admonition(Body): pass
859 class Special(Body):
860 """Special internal body elements."""
862 class Invisible(PreBibliographic):
863 """Internal elements that don't appear in output."""
865 class Part: pass
867 class Inline: pass
869 class Referential(Resolvable): pass
872 class Targetable(Resolvable):
874 referenced = 0
876 indirect_reference_name = None
877 """Holds the whitespace_normalized_name (contains mixed case) of a target.
878 Required for MoinMoin/reST compatibility."""
881 class Labeled:
882 """Contains a `label` as its first element."""
885 # ==============
886 # Root Element
887 # ==============
889 class document(Root, Structural, Element):
892 The document root element.
894 Do not instantiate this class directly; use
895 `docutils.utils.new_document()` instead.
898 def __init__(self, settings, reporter, *args, **kwargs):
899 Element.__init__(self, *args, **kwargs)
901 self.current_source = None
902 """Path to or description of the input source being processed."""
904 self.current_line = None
905 """Line number (1-based) of `current_source`."""
907 self.settings = settings
908 """Runtime settings data record."""
910 self.reporter = reporter
911 """System message generator."""
913 self.indirect_targets = []
914 """List of indirect target nodes."""
916 self.substitution_defs = {}
917 """Mapping of substitution names to substitution_definition nodes."""
919 self.substitution_names = {}
920 """Mapping of case-normalized substitution names to case-sensitive
921 names."""
923 self.refnames = {}
924 """Mapping of names to lists of referencing nodes."""
926 self.refids = {}
927 """Mapping of ids to lists of referencing nodes."""
929 self.nameids = {}
930 """Mapping of names to unique id's."""
932 self.nametypes = {}
933 """Mapping of names to hyperlink type (boolean: True => explicit,
934 False => implicit."""
936 self.ids = {}
937 """Mapping of ids to nodes."""
939 self.footnote_refs = {}
940 """Mapping of footnote labels to lists of footnote_reference nodes."""
942 self.citation_refs = {}
943 """Mapping of citation labels to lists of citation_reference nodes."""
945 self.autofootnotes = []
946 """List of auto-numbered footnote nodes."""
948 self.autofootnote_refs = []
949 """List of auto-numbered footnote_reference nodes."""
951 self.symbol_footnotes = []
952 """List of symbol footnote nodes."""
954 self.symbol_footnote_refs = []
955 """List of symbol footnote_reference nodes."""
957 self.footnotes = []
958 """List of manually-numbered footnote nodes."""
960 self.citations = []
961 """List of citation nodes."""
963 self.autofootnote_start = 1
964 """Initial auto-numbered footnote number."""
966 self.symbol_footnote_start = 0
967 """Initial symbol footnote symbol index."""
969 self.id_start = 1
970 """Initial ID number."""
972 self.parse_messages = []
973 """System messages generated while parsing."""
975 self.transform_messages = []
976 """System messages generated while applying transforms."""
978 import docutils.transforms
979 self.transformer = docutils.transforms.Transformer(self)
980 """Storage for transforms to be applied to this document."""
982 self.decoration = None
983 """Document's `decoration` node."""
985 self.document = self
987 def __getstate__(self):
989 Return dict with unpicklable references removed.
991 state = self.__dict__.copy()
992 state['reporter'] = None
993 state['transformer'] = None
994 return state
996 def asdom(self, dom=None):
997 """Return a DOM representation of this document."""
998 if dom is None:
999 import xml.dom.minidom as dom
1000 domroot = dom.Document()
1001 domroot.appendChild(self._dom_node(domroot))
1002 return domroot
1004 def set_id(self, node, msgnode=None):
1005 for id in node['ids']:
1006 if id in self.ids and self.ids[id] is not node:
1007 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1008 if msgnode != None:
1009 msgnode += msg
1010 if not node['ids']:
1011 for name in node['names']:
1012 id = self.settings.id_prefix + make_id(name)
1013 if id and id not in self.ids:
1014 break
1015 else:
1016 id = ''
1017 while not id or id in self.ids:
1018 id = (self.settings.id_prefix +
1019 self.settings.auto_id_prefix + str(self.id_start))
1020 self.id_start += 1
1021 node['ids'].append(id)
1022 self.ids[id] = node
1023 return id
1025 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1027 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1028 booleans representing hyperlink type (True==explicit,
1029 False==implicit). This method updates the mappings.
1031 The following state transition table shows how `self.nameids` ("ids")
1032 and `self.nametypes` ("types") change with new input (a call to this
1033 method), and what actions are performed ("implicit"-type system
1034 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1036 ==== ===== ======== ======== ======= ==== ===== =====
1037 Old State Input Action New State Notes
1038 ----------- -------- ----------------- ----------- -----
1039 ids types new type sys.msg. dupname ids types
1040 ==== ===== ======== ======== ======= ==== ===== =====
1041 - - explicit - - new True
1042 - - implicit - - new False
1043 None False explicit - - new True
1044 old False explicit implicit old new True
1045 None True explicit explicit new None True
1046 old True explicit explicit new,old None True [#]_
1047 None False implicit implicit new None False
1048 old False implicit implicit new,old None False
1049 None True implicit implicit new None True
1050 old True implicit implicit new old True
1051 ==== ===== ======== ======== ======= ==== ===== =====
1053 .. [#] Do not clear the name-to-id map or invalidate the old target if
1054 both old and new targets are external and refer to identical URIs.
1055 The new target is invalidated regardless.
1057 for name in node['names']:
1058 if name in self.nameids:
1059 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1060 else:
1061 self.nameids[name] = id
1062 self.nametypes[name] = explicit
1064 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1065 old_id = self.nameids[name]
1066 old_explicit = self.nametypes[name]
1067 self.nametypes[name] = old_explicit or explicit
1068 if explicit:
1069 if old_explicit:
1070 level = 2
1071 if old_id is not None:
1072 old_node = self.ids[old_id]
1073 if 'refuri' in node:
1074 refuri = node['refuri']
1075 if old_node['names'] \
1076 and 'refuri' in old_node \
1077 and old_node['refuri'] == refuri:
1078 level = 1 # just inform if refuri's identical
1079 if level > 1:
1080 dupname(old_node, name)
1081 self.nameids[name] = None
1082 msg = self.reporter.system_message(
1083 level, 'Duplicate explicit target name: "%s".' % name,
1084 backrefs=[id], base_node=node)
1085 if msgnode != None:
1086 msgnode += msg
1087 dupname(node, name)
1088 else:
1089 self.nameids[name] = id
1090 if old_id is not None:
1091 old_node = self.ids[old_id]
1092 dupname(old_node, name)
1093 else:
1094 if old_id is not None and not old_explicit:
1095 self.nameids[name] = None
1096 old_node = self.ids[old_id]
1097 dupname(old_node, name)
1098 dupname(node, name)
1099 if not explicit or (not old_explicit and old_id is not None):
1100 msg = self.reporter.info(
1101 'Duplicate implicit target name: "%s".' % name,
1102 backrefs=[id], base_node=node)
1103 if msgnode != None:
1104 msgnode += msg
1106 def has_name(self, name):
1107 return name in self.nameids
1109 # "note" here is an imperative verb: "take note of".
1110 def note_implicit_target(self, target, msgnode=None):
1111 id = self.set_id(target, msgnode)
1112 self.set_name_id_map(target, id, msgnode, explicit=None)
1114 def note_explicit_target(self, target, msgnode=None):
1115 id = self.set_id(target, msgnode)
1116 self.set_name_id_map(target, id, msgnode, explicit=True)
1118 def note_refname(self, node):
1119 self.refnames.setdefault(node['refname'], []).append(node)
1121 def note_refid(self, node):
1122 self.refids.setdefault(node['refid'], []).append(node)
1124 def note_indirect_target(self, target):
1125 self.indirect_targets.append(target)
1126 if target['names']:
1127 self.note_refname(target)
1129 def note_anonymous_target(self, target):
1130 self.set_id(target)
1132 def note_autofootnote(self, footnote):
1133 self.set_id(footnote)
1134 self.autofootnotes.append(footnote)
1136 def note_autofootnote_ref(self, ref):
1137 self.set_id(ref)
1138 self.autofootnote_refs.append(ref)
1140 def note_symbol_footnote(self, footnote):
1141 self.set_id(footnote)
1142 self.symbol_footnotes.append(footnote)
1144 def note_symbol_footnote_ref(self, ref):
1145 self.set_id(ref)
1146 self.symbol_footnote_refs.append(ref)
1148 def note_footnote(self, footnote):
1149 self.set_id(footnote)
1150 self.footnotes.append(footnote)
1152 def note_footnote_ref(self, ref):
1153 self.set_id(ref)
1154 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1155 self.note_refname(ref)
1157 def note_citation(self, citation):
1158 self.citations.append(citation)
1160 def note_citation_ref(self, ref):
1161 self.set_id(ref)
1162 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1163 self.note_refname(ref)
1165 def note_substitution_def(self, subdef, def_name, msgnode=None):
1166 name = whitespace_normalize_name(def_name)
1167 if name in self.substitution_defs:
1168 msg = self.reporter.error(
1169 'Duplicate substitution definition name: "%s".' % name,
1170 base_node=subdef)
1171 if msgnode != None:
1172 msgnode += msg
1173 oldnode = self.substitution_defs[name]
1174 dupname(oldnode, name)
1175 # keep only the last definition:
1176 self.substitution_defs[name] = subdef
1177 # case-insensitive mapping:
1178 self.substitution_names[fully_normalize_name(name)] = name
1180 def note_substitution_ref(self, subref, refname):
1181 subref['refname'] = whitespace_normalize_name(refname)
1183 def note_pending(self, pending, priority=None):
1184 self.transformer.add_pending(pending, priority)
1186 def note_parse_message(self, message):
1187 self.parse_messages.append(message)
1189 def note_transform_message(self, message):
1190 self.transform_messages.append(message)
1192 def note_source(self, source, offset):
1193 self.current_source = source
1194 if offset is None:
1195 self.current_line = offset
1196 else:
1197 self.current_line = offset + 1
1199 def copy(self):
1200 return self.__class__(self.settings, self.reporter,
1201 **self.attributes)
1203 def get_decoration(self):
1204 if not self.decoration:
1205 self.decoration = decoration()
1206 index = self.first_child_not_matching_class(Titular)
1207 if index is None:
1208 self.append(self.decoration)
1209 else:
1210 self.insert(index, self.decoration)
1211 return self.decoration
1214 # ================
1215 # Title Elements
1216 # ================
1218 class title(Titular, PreBibliographic, TextElement): pass
1219 class subtitle(Titular, PreBibliographic, TextElement): pass
1220 class rubric(Titular, TextElement): pass
1223 # ========================
1224 # Bibliographic Elements
1225 # ========================
1227 class docinfo(Bibliographic, Element): pass
1228 class author(Bibliographic, TextElement): pass
1229 class authors(Bibliographic, Element): pass
1230 class organization(Bibliographic, TextElement): pass
1231 class address(Bibliographic, FixedTextElement): pass
1232 class contact(Bibliographic, TextElement): pass
1233 class version(Bibliographic, TextElement): pass
1234 class revision(Bibliographic, TextElement): pass
1235 class status(Bibliographic, TextElement): pass
1236 class date(Bibliographic, TextElement): pass
1237 class copyright(Bibliographic, TextElement): pass
1240 # =====================
1241 # Decorative Elements
1242 # =====================
1244 class decoration(Decorative, Element):
1246 def get_header(self):
1247 if not len(self.children) or not isinstance(self.children[0], header):
1248 self.insert(0, header())
1249 return self.children[0]
1251 def get_footer(self):
1252 if not len(self.children) or not isinstance(self.children[-1], footer):
1253 self.append(footer())
1254 return self.children[-1]
1257 class header(Decorative, Element): pass
1258 class footer(Decorative, Element): pass
1261 # =====================
1262 # Structural Elements
1263 # =====================
1265 class section(Structural, Element): pass
1268 class topic(Structural, Element):
1271 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1272 or textual figures. A topic is just like a section, except that it has no
1273 subsections, and it doesn't have to conform to section placement rules.
1275 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1276 but only at the top level of a section or document. Topics cannot nest
1277 inside topics, sidebars, or body elements; you can't have a topic inside a
1278 table, list, block quote, etc.
1282 class sidebar(Structural, Element):
1285 Sidebars are like miniature, parallel documents that occur inside other
1286 documents, providing related or reference material. A sidebar is
1287 typically offset by a border and "floats" to the side of the page; the
1288 document's main text may flow around it. Sidebars can also be likened to
1289 super-footnotes; their content is outside of the flow of the document's
1290 main text.
1292 Sidebars are allowed wherever body elements (list, table, etc.) are
1293 allowed, but only at the top level of a section or document. Sidebars
1294 cannot nest inside sidebars, topics, or body elements; you can't have a
1295 sidebar inside a table, list, block quote, etc.
1299 class transition(Structural, Element): pass
1302 # ===============
1303 # Body Elements
1304 # ===============
1306 class paragraph(General, TextElement): pass
1307 class compound(General, Element): pass
1308 class container(General, Element): pass
1309 class bullet_list(Sequential, Element): pass
1310 class enumerated_list(Sequential, Element): pass
1311 class list_item(Part, Element): pass
1312 class definition_list(Sequential, Element): pass
1313 class definition_list_item(Part, Element): pass
1314 class term(Part, TextElement): pass
1315 class classifier(Part, TextElement): pass
1316 class definition(Part, Element): pass
1317 class field_list(Sequential, Element): pass
1318 class field(Part, Element): pass
1319 class field_name(Part, TextElement): pass
1320 class field_body(Part, Element): pass
1323 class option(Part, Element):
1325 child_text_separator = ''
1328 class option_argument(Part, TextElement):
1330 def astext(self):
1331 return self.get('delimiter', ' ') + TextElement.astext(self)
1334 class option_group(Part, Element):
1336 child_text_separator = ', '
1339 class option_list(Sequential, Element): pass
1342 class option_list_item(Part, Element):
1344 child_text_separator = ' '
1347 class option_string(Part, TextElement): pass
1348 class description(Part, Element): pass
1349 class literal_block(General, FixedTextElement): pass
1350 class doctest_block(General, FixedTextElement): pass
1351 class math_block(General, FixedTextElement): pass
1352 class line_block(General, Element): pass
1355 class line(Part, TextElement):
1357 indent = None
1360 class block_quote(General, Element): pass
1361 class attribution(Part, TextElement): pass
1362 class attention(Admonition, Element): pass
1363 class caution(Admonition, Element): pass
1364 class danger(Admonition, Element): pass
1365 class error(Admonition, Element): pass
1366 class important(Admonition, Element): pass
1367 class note(Admonition, Element): pass
1368 class tip(Admonition, Element): pass
1369 class hint(Admonition, Element): pass
1370 class warning(Admonition, Element): pass
1371 class admonition(Admonition, Element): pass
1372 class comment(Special, Invisible, FixedTextElement): pass
1373 class substitution_definition(Special, Invisible, TextElement): pass
1374 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1375 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1376 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1377 class label(Part, TextElement): pass
1378 class figure(General, Element): pass
1379 class caption(Part, TextElement): pass
1380 class legend(Part, Element): pass
1381 class table(General, Element): pass
1382 class tgroup(Part, Element): pass
1383 class colspec(Part, Element): pass
1384 class thead(Part, Element): pass
1385 class tbody(Part, Element): pass
1386 class row(Part, Element): pass
1387 class entry(Part, Element): pass
1390 class system_message(Special, BackLinkable, PreBibliographic, Element):
1393 System message element.
1395 Do not instantiate this class directly; use
1396 ``document.reporter.info/warning/error/severe()`` instead.
1399 def __init__(self, message=None, *children, **attributes):
1400 if message:
1401 p = paragraph('', message)
1402 children = (p,) + children
1403 try:
1404 Element.__init__(self, '', *children, **attributes)
1405 except:
1406 print 'system_message: children=%r' % (children,)
1407 raise
1409 def astext(self):
1410 line = self.get('line', '')
1411 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1412 self['level'], Element.astext(self))
1415 class pending(Special, Invisible, Element):
1418 The "pending" element is used to encapsulate a pending operation: the
1419 operation (transform), the point at which to apply it, and any data it
1420 requires. Only the pending operation's location within the document is
1421 stored in the public document tree (by the "pending" object itself); the
1422 operation and its data are stored in the "pending" object's internal
1423 instance attributes.
1425 For example, say you want a table of contents in your reStructuredText
1426 document. The easiest way to specify where to put it is from within the
1427 document, with a directive::
1429 .. contents::
1431 But the "contents" directive can't do its work until the entire document
1432 has been parsed and possibly transformed to some extent. So the directive
1433 code leaves a placeholder behind that will trigger the second phase of its
1434 processing, something like this::
1436 <pending ...public attributes...> + internal attributes
1438 Use `document.note_pending()` so that the
1439 `docutils.transforms.Transformer` stage of processing can run all pending
1440 transforms.
1443 def __init__(self, transform, details=None,
1444 rawsource='', *children, **attributes):
1445 Element.__init__(self, rawsource, *children, **attributes)
1447 self.transform = transform
1448 """The `docutils.transforms.Transform` class implementing the pending
1449 operation."""
1451 self.details = details or {}
1452 """Detail data (dictionary) required by the pending operation."""
1454 def pformat(self, indent=' ', level=0):
1455 internals = [
1456 '.. internal attributes:',
1457 ' .transform: %s.%s' % (self.transform.__module__,
1458 self.transform.__name__),
1459 ' .details:']
1460 details = self.details.items()
1461 details.sort()
1462 for key, value in details:
1463 if isinstance(value, Node):
1464 internals.append('%7s%s:' % ('', key))
1465 internals.extend(['%9s%s' % ('', line)
1466 for line in value.pformat().splitlines()])
1467 elif value and isinstance(value, list) \
1468 and isinstance(value[0], Node):
1469 internals.append('%7s%s:' % ('', key))
1470 for v in value:
1471 internals.extend(['%9s%s' % ('', line)
1472 for line in v.pformat().splitlines()])
1473 else:
1474 internals.append('%7s%s: %r' % ('', key, value))
1475 return (Element.pformat(self, indent, level)
1476 + ''.join([(' %s%s\n' % (indent * level, line))
1477 for line in internals]))
1479 def copy(self):
1480 return self.__class__(self.transform, self.details, self.rawsource,
1481 **self.attributes)
1484 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1487 Raw data that is to be passed untouched to the Writer.
1490 pass
1493 # =================
1494 # Inline Elements
1495 # =================
1497 class emphasis(Inline, TextElement): pass
1498 class strong(Inline, TextElement): pass
1499 class literal(Inline, TextElement): pass
1500 class reference(General, Inline, Referential, TextElement): pass
1501 class footnote_reference(Inline, Referential, TextElement): pass
1502 class citation_reference(Inline, Referential, TextElement): pass
1503 class substitution_reference(Inline, TextElement): pass
1504 class title_reference(Inline, TextElement): pass
1505 class abbreviation(Inline, TextElement): pass
1506 class acronym(Inline, TextElement): pass
1507 class superscript(Inline, TextElement): pass
1508 class subscript(Inline, TextElement): pass
1509 class math(Inline, TextElement): pass
1512 class image(General, Inline, Element):
1514 def astext(self):
1515 return self.get('alt', '')
1518 class inline(Inline, TextElement): pass
1519 class problematic(Inline, TextElement): pass
1520 class generated(Inline, TextElement): pass
1523 # ========================================
1524 # Auxiliary Classes, Functions, and Data
1525 # ========================================
1527 node_class_names = """
1528 Text
1529 abbreviation acronym address admonition attention attribution author
1530 authors
1531 block_quote bullet_list
1532 caption caution citation citation_reference classifier colspec comment
1533 compound contact container copyright
1534 danger date decoration definition definition_list definition_list_item
1535 description docinfo doctest_block document
1536 emphasis entry enumerated_list error
1537 field field_body field_list field_name figure footer
1538 footnote footnote_reference
1539 generated
1540 header hint
1541 image important inline
1542 label legend line line_block list_item literal literal_block
1543 math math_block
1544 note
1545 option option_argument option_group option_list option_list_item
1546 option_string organization
1547 paragraph pending problematic
1548 raw reference revision row rubric
1549 section sidebar status strong subscript substitution_definition
1550 substitution_reference subtitle superscript system_message
1551 table target tbody term tgroup thead tip title title_reference topic
1552 transition
1553 version
1554 warning""".split()
1555 """A list of names of all concrete Node subclasses."""
1558 class NodeVisitor:
1561 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1562 document tree traversals.
1564 Each node class has corresponding methods, doing nothing by
1565 default; override individual methods for specific and useful
1566 behaviour. The `dispatch_visit()` method is called by
1567 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1568 the `dispatch_departure()` method before exiting a node.
1570 The dispatch methods call "``visit_`` + node class name" or
1571 "``depart_`` + node class name", resp.
1573 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1574 methods should be implemented for *all* node types encountered (such as
1575 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1576 raise exceptions.
1578 For sparse traversals, where only certain node types are of interest,
1579 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1580 processing is desired, subclass `GenericNodeVisitor`.
1582 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1583 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1584 1995.
1587 optional = ()
1589 Tuple containing node class names (as strings).
1591 No exception will be raised if writers do not implement visit
1592 or departure functions for these node classes.
1594 Used to ensure transitional compatibility with existing 3rd-party writers.
1597 def __init__(self, document):
1598 self.document = document
1600 def dispatch_visit(self, node):
1602 Call self."``visit_`` + node class name" with `node` as
1603 parameter. If the ``visit_...`` method does not exist, call
1604 self.unknown_visit.
1606 node_name = node.__class__.__name__
1607 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1608 self.document.reporter.debug(
1609 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1610 % (method.__name__, node_name))
1611 return method(node)
1613 def dispatch_departure(self, node):
1615 Call self."``depart_`` + node class name" with `node` as
1616 parameter. If the ``depart_...`` method does not exist, call
1617 self.unknown_departure.
1619 node_name = node.__class__.__name__
1620 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1621 self.document.reporter.debug(
1622 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1623 % (method.__name__, node_name))
1624 return method(node)
1626 def unknown_visit(self, node):
1628 Called when entering unknown `Node` types.
1630 Raise an exception unless overridden.
1632 if (self.document.settings.strict_visitor
1633 or node.__class__.__name__ not in self.optional):
1634 raise NotImplementedError(
1635 '%s visiting unknown node type: %s'
1636 % (self.__class__, node.__class__.__name__))
1638 def unknown_departure(self, node):
1640 Called before exiting unknown `Node` types.
1642 Raise exception unless overridden.
1644 if (self.document.settings.strict_visitor
1645 or node.__class__.__name__ not in self.optional):
1646 raise NotImplementedError(
1647 '%s departing unknown node type: %s'
1648 % (self.__class__, node.__class__.__name__))
1651 class SparseNodeVisitor(NodeVisitor):
1654 Base class for sparse traversals, where only certain node types are of
1655 interest. When ``visit_...`` & ``depart_...`` methods should be
1656 implemented for *all* node types (such as for `docutils.writers.Writer`
1657 subclasses), subclass `NodeVisitor` instead.
1661 class GenericNodeVisitor(NodeVisitor):
1664 Generic "Visitor" abstract superclass, for simple traversals.
1666 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1667 each ``depart_...`` method (when using `Node.walkabout()`) calls
1668 `default_departure()`. `default_visit()` (and `default_departure()`) must
1669 be overridden in subclasses.
1671 Define fully generic visitors by overriding `default_visit()` (and
1672 `default_departure()`) only. Define semi-generic visitors by overriding
1673 individual ``visit_...()`` (and ``depart_...()``) methods also.
1675 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1676 be overridden for default behavior.
1679 def default_visit(self, node):
1680 """Override for generic, uniform traversals."""
1681 raise NotImplementedError
1683 def default_departure(self, node):
1684 """Override for generic, uniform traversals."""
1685 raise NotImplementedError
1687 def _call_default_visit(self, node):
1688 self.default_visit(node)
1690 def _call_default_departure(self, node):
1691 self.default_departure(node)
1693 def _nop(self, node):
1694 pass
1696 def _add_node_class_names(names):
1697 """Save typing with dynamic assignments:"""
1698 for _name in names:
1699 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1700 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1701 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1702 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1704 _add_node_class_names(node_class_names)
1707 class TreeCopyVisitor(GenericNodeVisitor):
1710 Make a complete copy of a tree or branch, including element attributes.
1713 def __init__(self, document):
1714 GenericNodeVisitor.__init__(self, document)
1715 self.parent_stack = []
1716 self.parent = []
1718 def get_tree_copy(self):
1719 return self.parent[0]
1721 def default_visit(self, node):
1722 """Copy the current node, and make it the new acting parent."""
1723 newnode = node.copy()
1724 self.parent.append(newnode)
1725 self.parent_stack.append(self.parent)
1726 self.parent = newnode
1728 def default_departure(self, node):
1729 """Restore the previous acting parent."""
1730 self.parent = self.parent_stack.pop()
1733 class TreePruningException(Exception):
1736 Base class for `NodeVisitor`-related tree pruning exceptions.
1738 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1739 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1740 the tree traversed.
1743 pass
1746 class SkipChildren(TreePruningException):
1749 Do not visit any children of the current node. The current node's
1750 siblings and ``depart_...`` method are not affected.
1753 pass
1756 class SkipSiblings(TreePruningException):
1759 Do not visit any more siblings (to the right) of the current node. The
1760 current node's children and its ``depart_...`` method are not affected.
1763 pass
1766 class SkipNode(TreePruningException):
1769 Do not visit the current node's children, and do not call the current
1770 node's ``depart_...`` method.
1773 pass
1776 class SkipDeparture(TreePruningException):
1779 Do not call the current node's ``depart_...`` method. The current node's
1780 children and siblings are not affected.
1783 pass
1786 class NodeFound(TreePruningException):
1789 Raise to indicate that the target of a search has been found. This
1790 exception must be caught by the client; it is not caught by the traversal
1791 code.
1794 pass
1797 class StopTraversal(TreePruningException):
1800 Stop the traversal alltogether. The current node's ``depart_...`` method
1801 is not affected. The parent nodes ``depart_...`` methods are also called
1802 as usual. No other nodes are visited. This is an alternative to
1803 NodeFound that does not cause exception handling to trickle up to the
1804 caller.
1807 pass
1810 def make_id(string):
1812 Convert `string` into an identifier and return it.
1814 Docutils identifiers will conform to the regular expression
1815 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1816 and "id" attributes) should have no underscores, colons, or periods.
1817 Hyphens may be used.
1819 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1821 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1822 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1823 underscores ("_"), colons (":"), and periods (".").
1825 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1826 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1827 "escape" 8-bit characters have been replaced with entities)::
1829 unicode \\[0-9a-f]{1,4}
1830 latin1 [&iexcl;-&yuml;]
1831 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1832 nmchar [-a-z0-9]|{latin1}|{escape}
1833 name {nmchar}+
1835 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1836 or periods ("."), therefore "class" and "id" attributes should not contain
1837 these characters. They should be replaced with hyphens ("-"). Combined
1838 with HTML's requirements (the first character must be a letter; no
1839 "unicode", "latin1", or "escape" characters), this results in the
1840 ``[a-z](-?[a-z0-9]+)*`` pattern.
1842 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1843 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1845 id = string.lower()
1846 if not isinstance(id, unicode):
1847 id = id.decode()
1848 id = id.translate(_non_id_translate_digraphs)
1849 id = id.translate(_non_id_translate)
1850 # get rid of non-ascii characters.
1851 # 'ascii' lowercase to prevent problems with turkish locale.
1852 id = unicodedata.normalize('NFKD', id).\
1853 encode('ascii', 'ignore').decode('ascii')
1854 # shrink runs of whitespace and replace by hyphen
1855 id = _non_id_chars.sub('-', ' '.join(id.split()))
1856 id = _non_id_at_ends.sub('', id)
1857 return str(id)
1859 _non_id_chars = re.compile('[^a-z0-9]+')
1860 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1861 _non_id_translate = {
1862 0x00f8: u'o', # o with stroke
1863 0x0111: u'd', # d with stroke
1864 0x0127: u'h', # h with stroke
1865 0x0131: u'i', # dotless i
1866 0x0142: u'l', # l with stroke
1867 0x0167: u't', # t with stroke
1868 0x0180: u'b', # b with stroke
1869 0x0183: u'b', # b with topbar
1870 0x0188: u'c', # c with hook
1871 0x018c: u'd', # d with topbar
1872 0x0192: u'f', # f with hook
1873 0x0199: u'k', # k with hook
1874 0x019a: u'l', # l with bar
1875 0x019e: u'n', # n with long right leg
1876 0x01a5: u'p', # p with hook
1877 0x01ab: u't', # t with palatal hook
1878 0x01ad: u't', # t with hook
1879 0x01b4: u'y', # y with hook
1880 0x01b6: u'z', # z with stroke
1881 0x01e5: u'g', # g with stroke
1882 0x0225: u'z', # z with hook
1883 0x0234: u'l', # l with curl
1884 0x0235: u'n', # n with curl
1885 0x0236: u't', # t with curl
1886 0x0237: u'j', # dotless j
1887 0x023c: u'c', # c with stroke
1888 0x023f: u's', # s with swash tail
1889 0x0240: u'z', # z with swash tail
1890 0x0247: u'e', # e with stroke
1891 0x0249: u'j', # j with stroke
1892 0x024b: u'q', # q with hook tail
1893 0x024d: u'r', # r with stroke
1894 0x024f: u'y', # y with stroke
1896 _non_id_translate_digraphs = {
1897 0x00df: u'sz', # ligature sz
1898 0x00e6: u'ae', # ae
1899 0x0153: u'oe', # ligature oe
1900 0x0238: u'db', # db digraph
1901 0x0239: u'qp', # qp digraph
1904 def dupname(node, name):
1905 node['dupnames'].append(name)
1906 node['names'].remove(name)
1907 # Assume that this method is referenced, even though it isn't; we
1908 # don't want to throw unnecessary system_messages.
1909 node.referenced = 1
1911 def fully_normalize_name(name):
1912 """Return a case- and whitespace-normalized name."""
1913 return ' '.join(name.lower().split())
1915 def whitespace_normalize_name(name):
1916 """Return a whitespace-normalized name."""
1917 return ' '.join(name.split())
1919 def serial_escape(value):
1920 """Escape string values that are elements of a list, for serialization."""
1921 return value.replace('\\', r'\\').replace(' ', r'\ ')
1923 def pseudo_quoteattr(value):
1924 """Quote attributes for pseudo-xml"""
1925 return '"%s"' % value
1927 # \f
1929 # Local Variables:
1930 # indent-tabs-mode: nil
1931 # sentence-end-double-space: t
1932 # fill-column: 78
1933 # End: