SmartQuotes transform: language-depended quote characters.
[docutils.git] / docutils / nodes.py
blob5f4d10d5397b4d8d952210d01efc6c1c00ebb73f
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Docutils document tree element class library.
8 Classes in CamelCase are abstract base classes or auxiliary classes. The one
9 exception is `Text`, for a text (PCDATA) node; uppercase is used to
10 differentiate from element classes. Classes in lower_case_with_underscores
11 are element classes, matching the XML element generic identifiers in the DTD_.
13 The position of each node (the level at which it can occur) is significant and
14 is represented by abstract base classes (`Root`, `Structural`, `Body`,
15 `Inline`, etc.). Certain transformations will be easier because we can use
16 ``isinstance(node, base_class)`` to determine the position of the node in the
17 hierarchy.
19 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
20 """
22 __docformat__ = 'reStructuredText'
24 import sys
25 import os
26 import re
27 import warnings
28 import types
29 import unicodedata
31 # ==============================
32 # Functional Node Base Classes
33 # ==============================
35 class Node(object):
37 """Abstract base class of nodes in a document tree."""
39 parent = None
40 """Back-reference to the Node immediately containing this Node."""
42 document = None
43 """The `document` node at the root of the tree containing this Node."""
45 source = None
46 """Path or description of the input source which generated this Node."""
48 line = None
49 """The line number (1-based) of the beginning of this Node in `source`."""
51 def __nonzero__(self):
52 """
53 Node instances are always true, even if they're empty. A node is more
54 than a simple container. Its boolean "truth" does not depend on
55 having one or more subnodes in the doctree.
57 Use `len()` to check node length. Use `None` to represent a boolean
58 false value.
59 """
60 return True
62 if sys.version_info < (3,):
63 # on 2.x, str(node) will be a byte string with Unicode
64 # characters > 255 escaped; on 3.x this is no longer necessary
65 def __str__(self):
66 return unicode(self).encode('raw_unicode_escape')
68 def asdom(self, dom=None):
69 """Return a DOM **fragment** representation of this Node."""
70 if dom is None:
71 import xml.dom.minidom as dom
72 domroot = dom.Document()
73 return self._dom_node(domroot)
75 def pformat(self, indent=' ', level=0):
76 """
77 Return an indented pseudo-XML representation, for test purposes.
79 Override in subclasses.
80 """
81 raise NotImplementedError
83 def copy(self):
84 """Return a copy of self."""
85 raise NotImplementedError
87 def deepcopy(self):
88 """Return a deep copy of self (also copying children)."""
89 raise NotImplementedError
91 def setup_child(self, child):
92 child.parent = self
93 if self.document:
94 child.document = self.document
95 if child.source is None:
96 child.source = self.document.current_source
97 if child.line is None:
98 child.line = self.document.current_line
100 def walk(self, visitor):
102 Traverse a tree of `Node` objects, calling the
103 `dispatch_visit()` method of `visitor` when entering each
104 node. (The `walkabout()` method is similar, except it also
105 calls the `dispatch_departure()` method before exiting each
106 node.)
108 This tree traversal supports limited in-place tree
109 modifications. Replacing one node with one or more nodes is
110 OK, as is removing an element. However, if the node removed
111 or replaced occurs after the current node, the old node will
112 still be traversed, and any new nodes will not.
114 Within ``visit`` methods (and ``depart`` methods for
115 `walkabout()`), `TreePruningException` subclasses may be raised
116 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
118 Parameter `visitor`: A `NodeVisitor` object, containing a
119 ``visit`` implementation for each `Node` subclass encountered.
121 Return true if we should stop the traversal.
123 stop = False
124 visitor.document.reporter.debug(
125 'docutils.nodes.Node.walk calling dispatch_visit for %s'
126 % self.__class__.__name__)
127 try:
128 try:
129 visitor.dispatch_visit(self)
130 except (SkipChildren, SkipNode):
131 return stop
132 except SkipDeparture: # not applicable; ignore
133 pass
134 children = self.children
135 try:
136 for child in children[:]:
137 if child.walk(visitor):
138 stop = True
139 break
140 except SkipSiblings:
141 pass
142 except StopTraversal:
143 stop = True
144 return stop
146 def walkabout(self, visitor):
148 Perform a tree traversal similarly to `Node.walk()` (which
149 see), except also call the `dispatch_departure()` method
150 before exiting each node.
152 Parameter `visitor`: A `NodeVisitor` object, containing a
153 ``visit`` and ``depart`` implementation for each `Node`
154 subclass encountered.
156 Return true if we should stop the traversal.
158 call_depart = True
159 stop = False
160 visitor.document.reporter.debug(
161 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
162 % self.__class__.__name__)
163 try:
164 try:
165 visitor.dispatch_visit(self)
166 except SkipNode:
167 return stop
168 except SkipDeparture:
169 call_depart = False
170 children = self.children
171 try:
172 for child in children[:]:
173 if child.walkabout(visitor):
174 stop = True
175 break
176 except SkipSiblings:
177 pass
178 except SkipChildren:
179 pass
180 except StopTraversal:
181 stop = True
182 if call_depart:
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walkabout calling dispatch_departure '
185 'for %s' % self.__class__.__name__)
186 visitor.dispatch_departure(self)
187 return stop
189 def _fast_traverse(self, cls):
190 """Specialized traverse() that only supports instance checks."""
191 result = []
192 if isinstance(self, cls):
193 result.append(self)
194 for child in self.children:
195 result.extend(child._fast_traverse(cls))
196 return result
198 def _all_traverse(self):
199 """Specialized traverse() that doesn't check for a condition."""
200 result = []
201 result.append(self)
202 for child in self.children:
203 result.extend(child._all_traverse())
204 return result
206 def traverse(self, condition=None, include_self=True, descend=True,
207 siblings=False, ascend=False):
209 Return an iterable containing
211 * self (if include_self is true)
212 * all descendants in tree traversal order (if descend is true)
213 * all siblings (if siblings is true) and their descendants (if
214 also descend is true)
215 * the siblings of the parent (if ascend is true) and their
216 descendants (if also descend is true), and so on
218 If `condition` is not None, the iterable contains only nodes
219 for which ``condition(node)`` is true. If `condition` is a
220 node class ``cls``, it is equivalent to a function consisting
221 of ``return isinstance(node, cls)``.
223 If ascend is true, assume siblings to be true as well.
225 For example, given the following tree::
227 <paragraph>
228 <emphasis> <--- emphasis.traverse() and
229 <strong> <--- strong.traverse() are called.
232 <reference name="Baz" refid="baz">
235 Then list(emphasis.traverse()) equals ::
237 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
239 and list(strong.traverse(ascend=True)) equals ::
241 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
243 if ascend:
244 siblings=True
245 # Check for special argument combinations that allow using an
246 # optimized version of traverse()
247 if include_self and descend and not siblings:
248 if condition is None:
249 return self._all_traverse()
250 elif isinstance(condition, (types.ClassType, type)):
251 return self._fast_traverse(condition)
252 # Check if `condition` is a class (check for TypeType for Python
253 # implementations that use only new-style classes, like PyPy).
254 if isinstance(condition, (types.ClassType, type)):
255 node_class = condition
256 def condition(node, node_class=node_class):
257 return isinstance(node, node_class)
258 r = []
259 if include_self and (condition is None or condition(self)):
260 r.append(self)
261 if descend and len(self.children):
262 for child in self:
263 r.extend(child.traverse(include_self=True, descend=True,
264 siblings=False, ascend=False,
265 condition=condition))
266 if siblings or ascend:
267 node = self
268 while node.parent:
269 index = node.parent.index(node)
270 for sibling in node.parent[index+1:]:
271 r.extend(sibling.traverse(include_self=True,
272 descend=descend,
273 siblings=False, ascend=False,
274 condition=condition))
275 if not ascend:
276 break
277 else:
278 node = node.parent
279 return r
281 def next_node(self, condition=None, include_self=False, descend=True,
282 siblings=False, ascend=False):
284 Return the first node in the iterable returned by traverse(),
285 or None if the iterable is empty.
287 Parameter list is the same as of traverse. Note that
288 include_self defaults to 0, though.
290 iterable = self.traverse(condition=condition,
291 include_self=include_self, descend=descend,
292 siblings=siblings, ascend=ascend)
293 try:
294 return iterable[0]
295 except IndexError:
296 return None
298 if sys.version_info < (3,):
299 class reprunicode(unicode):
301 A class that removes the initial u from unicode's repr.
304 def __repr__(self):
305 return unicode.__repr__(self)[1:]
306 else:
307 reprunicode = unicode
310 class Text(Node, reprunicode):
313 Instances are terminal nodes (leaves) containing text only; no child
314 nodes or attributes. Initialize by passing a string to the constructor.
315 Access the text itself with the `astext` method.
318 tagname = '#text'
320 children = ()
321 """Text nodes have no children, and cannot have children."""
323 if sys.version_info > (3,):
324 def __new__(cls, data, rawsource=None):
325 """Prevent the rawsource argument from propagating to str."""
326 if isinstance(data, bytes):
327 raise TypeError('expecting str data, not bytes')
328 return reprunicode.__new__(cls, data)
329 else:
330 def __new__(cls, data, rawsource=None):
331 """Prevent the rawsource argument from propagating to str."""
332 return reprunicode.__new__(cls, data)
334 def __init__(self, data, rawsource=''):
336 self.rawsource = rawsource
337 """The raw text from which this element was constructed."""
339 def shortrepr(self, maxlen=18):
340 data = self
341 if len(data) > maxlen:
342 data = data[:maxlen-4] + ' ...'
343 return '<%s: %s>' % (self.tagname, repr(reprunicode(data)))
345 def __repr__(self):
346 return self.shortrepr(maxlen=68)
348 def _dom_node(self, domroot):
349 return domroot.createTextNode(unicode(self))
351 def astext(self):
352 return reprunicode(self)
354 # Note about __unicode__: The implementation of __unicode__ here,
355 # and the one raising NotImplemented in the superclass Node had
356 # to be removed when changing Text to a subclass of unicode instead
357 # of UserString, since there is no way to delegate the __unicode__
358 # call to the superclass unicode:
359 # unicode itself does not have __unicode__ method to delegate to
360 # and calling unicode(self) or unicode.__new__ directly creates
361 # an infinite loop
363 def copy(self):
364 return self.__class__(reprunicode(self), rawsource=self.rawsource)
366 def deepcopy(self):
367 return self.copy()
369 def pformat(self, indent=' ', level=0):
370 result = []
371 indent = indent * level
372 for line in self.splitlines():
373 result.append(indent + line + '\n')
374 return ''.join(result)
376 # rstrip and lstrip are used by substitution definitions where
377 # they are expected to return a Text instance, this was formerly
378 # taken care of by UserString. Note that then and now the
379 # rawsource member is lost.
381 def rstrip(self, chars=None):
382 return self.__class__(reprunicode.rstrip(self, chars))
383 def lstrip(self, chars=None):
384 return self.__class__(reprunicode.lstrip(self, chars))
386 class Element(Node):
389 `Element` is the superclass to all specific elements.
391 Elements contain attributes and child nodes. Elements emulate
392 dictionaries for attributes, indexing by attribute name (a string). To
393 set the attribute 'att' to 'value', do::
395 element['att'] = 'value'
397 There are two special attributes: 'ids' and 'names'. Both are
398 lists of unique identifiers, and names serve as human interfaces
399 to IDs. Names are case- and whitespace-normalized (see the
400 fully_normalize_name() function), and IDs conform to the regular
401 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
403 Elements also emulate lists for child nodes (element nodes and/or text
404 nodes), indexing by integer. To get the first child node, use::
406 element[0]
408 Elements may be constructed using the ``+=`` operator. To add one new
409 child node to element, do::
411 element += node
413 This is equivalent to ``element.append(node)``.
415 To add a list of multiple child nodes at once, use the same ``+=``
416 operator::
418 element += [node1, node2]
420 This is equivalent to ``element.extend([node1, node2])``.
423 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
424 """List attributes, automatically initialized to empty lists for
425 all nodes."""
427 tagname = None
428 """The element generic identifier. If None, it is set as an instance
429 attribute to the name of the class."""
431 child_text_separator = '\n\n'
432 """Separator for child nodes, used by `astext()` method."""
434 def __init__(self, rawsource='', *children, **attributes):
435 self.rawsource = rawsource
436 """The raw text from which this element was constructed."""
438 self.children = []
439 """List of child nodes (elements and/or `Text`)."""
441 self.extend(children) # maintain parent info
443 self.attributes = {}
444 """Dictionary of attribute {name: value}."""
446 # Initialize list attributes.
447 for att in self.list_attributes:
448 self.attributes[att] = []
450 for att, value in attributes.items():
451 att = att.lower()
452 if att in self.list_attributes:
453 # mutable list; make a copy for this node
454 self.attributes[att] = value[:]
455 else:
456 self.attributes[att] = value
458 if self.tagname is None:
459 self.tagname = self.__class__.__name__
461 def _dom_node(self, domroot):
462 element = domroot.createElement(self.tagname)
463 for attribute, value in self.attlist():
464 if isinstance(value, list):
465 value = ' '.join([serial_escape('%s' % (v,)) for v in value])
466 element.setAttribute(attribute, '%s' % value)
467 for child in self.children:
468 element.appendChild(child._dom_node(domroot))
469 return element
471 def __repr__(self):
472 data = ''
473 for c in self.children:
474 data += c.shortrepr()
475 if len(data) > 60:
476 data = data[:56] + ' ...'
477 break
478 if self['names']:
479 return '<%s "%s": %s>' % (self.__class__.__name__,
480 '; '.join(self['names']), data)
481 else:
482 return '<%s: %s>' % (self.__class__.__name__, data)
484 def shortrepr(self):
485 if self['names']:
486 return '<%s "%s"...>' % (self.__class__.__name__,
487 '; '.join(self['names']))
488 else:
489 return '<%s...>' % self.tagname
491 def __unicode__(self):
492 if self.children:
493 return u'%s%s%s' % (self.starttag(),
494 ''.join([unicode(c) for c in self.children]),
495 self.endtag())
496 else:
497 return self.emptytag()
499 if sys.version_info > (3,):
500 # 2to3 doesn't convert __unicode__ to __str__
501 __str__ = __unicode__
503 def starttag(self, quoteattr=None):
504 # the optional arg is used by the docutils_xml writer
505 if quoteattr is None:
506 quoteattr = pseudo_quoteattr
507 parts = [self.tagname]
508 for name, value in self.attlist():
509 if value is None: # boolean attribute
510 parts.append(name)
511 continue
512 if isinstance(value, list):
513 values = [serial_escape('%s' % (v,)) for v in value]
514 value = ' '.join(values)
515 else:
516 value = unicode(value)
517 value = quoteattr(value)
518 parts.append(u'%s=%s' % (name, value))
519 return u'<%s>' % u' '.join(parts)
521 def endtag(self):
522 return '</%s>' % self.tagname
524 def emptytag(self):
525 return u'<%s/>' % u' '.join([self.tagname] +
526 ['%s="%s"' % (n, v)
527 for n, v in self.attlist()])
529 def __len__(self):
530 return len(self.children)
532 def __contains__(self, key):
533 # support both membership test for children and attributes
534 # (has_key is translated to "in" by 2to3)
535 if isinstance(key, basestring):
536 return key in self.attributes
537 return key in self.children
539 def __getitem__(self, key):
540 if isinstance(key, basestring):
541 return self.attributes[key]
542 elif isinstance(key, int):
543 return self.children[key]
544 elif isinstance(key, types.SliceType):
545 assert key.step in (None, 1), 'cannot handle slice with stride'
546 return self.children[key.start:key.stop]
547 else:
548 raise TypeError, ('element index must be an integer, a slice, or '
549 'an attribute name string')
551 def __setitem__(self, key, item):
552 if isinstance(key, basestring):
553 self.attributes[str(key)] = item
554 elif isinstance(key, int):
555 self.setup_child(item)
556 self.children[key] = item
557 elif isinstance(key, types.SliceType):
558 assert key.step in (None, 1), 'cannot handle slice with stride'
559 for node in item:
560 self.setup_child(node)
561 self.children[key.start:key.stop] = item
562 else:
563 raise TypeError, ('element index must be an integer, a slice, or '
564 'an attribute name string')
566 def __delitem__(self, key):
567 if isinstance(key, basestring):
568 del self.attributes[key]
569 elif isinstance(key, int):
570 del self.children[key]
571 elif isinstance(key, types.SliceType):
572 assert key.step in (None, 1), 'cannot handle slice with stride'
573 del self.children[key.start:key.stop]
574 else:
575 raise TypeError, ('element index must be an integer, a simple '
576 'slice, or an attribute name string')
578 def __add__(self, other):
579 return self.children + other
581 def __radd__(self, other):
582 return other + self.children
584 def __iadd__(self, other):
585 """Append a node or a list of nodes to `self.children`."""
586 if isinstance(other, Node):
587 self.append(other)
588 elif other is not None:
589 self.extend(other)
590 return self
592 def astext(self):
593 return self.child_text_separator.join(
594 [child.astext() for child in self.children])
596 def non_default_attributes(self):
597 atts = {}
598 for key, value in self.attributes.items():
599 if self.is_not_default(key):
600 atts[key] = value
601 return atts
603 def attlist(self):
604 attlist = self.non_default_attributes().items()
605 attlist.sort()
606 return attlist
608 def get(self, key, failobj=None):
609 return self.attributes.get(key, failobj)
611 def hasattr(self, attr):
612 return attr in self.attributes
614 def delattr(self, attr):
615 if attr in self.attributes:
616 del self.attributes[attr]
618 def setdefault(self, key, failobj=None):
619 return self.attributes.setdefault(key, failobj)
621 has_key = hasattr
623 # support operator ``in``
624 __contains__ = hasattr
626 def get_language_code(self, fallback=''):
627 """Return node's language tag.
629 Look iteratively in self and parents for a class argument
630 starting with ``language-`` and return the remainder of it
631 (which should be a `BCP49` language tag) or the `fallback`.
633 for cls in self.get('classes', []):
634 if cls.startswith('language-'):
635 return cls[9:]
636 try:
637 return self.parent.get_language(fallback)
638 except AttributeError:
639 return fallback
641 def append(self, item):
642 self.setup_child(item)
643 self.children.append(item)
645 def extend(self, item):
646 for node in item:
647 self.append(node)
649 def insert(self, index, item):
650 if isinstance(item, Node):
651 self.setup_child(item)
652 self.children.insert(index, item)
653 elif item is not None:
654 self[index:index] = item
656 def pop(self, i=-1):
657 return self.children.pop(i)
659 def remove(self, item):
660 self.children.remove(item)
662 def index(self, item):
663 return self.children.index(item)
665 def is_not_default(self, key):
666 if self[key] == [] and key in self.list_attributes:
667 return 0
668 else:
669 return 1
671 def update_basic_atts(self, dict):
673 Update basic attributes ('ids', 'names', 'classes',
674 'dupnames', but not 'source') from node or dictionary `dict`.
676 if isinstance(dict, Node):
677 dict = dict.attributes
678 for att in ('ids', 'classes', 'names', 'dupnames'):
679 for value in dict.get(att, []):
680 if not value in self[att]:
681 self[att].append(value)
683 def clear(self):
684 self.children = []
686 def replace(self, old, new):
687 """Replace one child `Node` with another child or children."""
688 index = self.index(old)
689 if isinstance(new, Node):
690 self.setup_child(new)
691 self[index] = new
692 elif new is not None:
693 self[index:index+1] = new
695 def replace_self(self, new):
697 Replace `self` node with `new`, where `new` is a node or a
698 list of nodes.
700 update = new
701 if not isinstance(new, Node):
702 # `new` is a list; update first child.
703 try:
704 update = new[0]
705 except IndexError:
706 update = None
707 if isinstance(update, Element):
708 update.update_basic_atts(self)
709 else:
710 # `update` is a Text node or `new` is an empty list.
711 # Assert that we aren't losing any attributes.
712 for att in ('ids', 'names', 'classes', 'dupnames'):
713 assert not self[att], \
714 'Losing "%s" attribute: %s' % (att, self[att])
715 self.parent.replace(self, new)
717 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
719 Return the index of the first child whose class exactly matches.
721 Parameters:
723 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
724 classes. If a tuple, any of the classes may match.
725 - `start`: Initial index to check.
726 - `end`: Initial index to *not* check.
728 if not isinstance(childclass, tuple):
729 childclass = (childclass,)
730 for index in range(start, min(len(self), end)):
731 for c in childclass:
732 if isinstance(self[index], c):
733 return index
734 return None
736 def first_child_not_matching_class(self, childclass, start=0,
737 end=sys.maxint):
739 Return the index of the first child whose class does *not* match.
741 Parameters:
743 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
744 classes. If a tuple, none of the classes may match.
745 - `start`: Initial index to check.
746 - `end`: Initial index to *not* check.
748 if not isinstance(childclass, tuple):
749 childclass = (childclass,)
750 for index in range(start, min(len(self), end)):
751 for c in childclass:
752 if isinstance(self.children[index], c):
753 break
754 else:
755 return index
756 return None
758 def pformat(self, indent=' ', level=0):
759 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
760 [child.pformat(indent, level+1)
761 for child in self.children])
763 def copy(self):
764 return self.__class__(rawsource=self.rawsource, **self.attributes)
766 def deepcopy(self):
767 copy = self.copy()
768 copy.extend([child.deepcopy() for child in self.children])
769 return copy
771 def set_class(self, name):
772 """Add a new class to the "classes" attribute."""
773 warnings.warn('docutils.nodes.Element.set_class deprecated; '
774 "append to Element['classes'] list attribute directly",
775 DeprecationWarning, stacklevel=2)
776 assert ' ' not in name
777 self['classes'].append(name.lower())
779 def note_referenced_by(self, name=None, id=None):
780 """Note that this Element has been referenced by its name
781 `name` or id `id`."""
782 self.referenced = 1
783 # Element.expect_referenced_by_* dictionaries map names or ids
784 # to nodes whose ``referenced`` attribute is set to true as
785 # soon as this node is referenced by the given name or id.
786 # Needed for target propagation.
787 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
788 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
789 if by_name:
790 assert name is not None
791 by_name.referenced = 1
792 if by_id:
793 assert id is not None
794 by_id.referenced = 1
797 class TextElement(Element):
800 An element which directly contains text.
802 Its children are all `Text` or `Inline` subclass nodes. You can
803 check whether an element's context is inline simply by checking whether
804 its immediate parent is a `TextElement` instance (including subclasses).
805 This is handy for nodes like `image` that can appear both inline and as
806 standalone body elements.
808 If passing children to `__init__()`, make sure to set `text` to
809 ``''`` or some other suitable value.
812 child_text_separator = ''
813 """Separator for child nodes, used by `astext()` method."""
815 def __init__(self, rawsource='', text='', *children, **attributes):
816 if text != '':
817 textnode = Text(text)
818 Element.__init__(self, rawsource, textnode, *children,
819 **attributes)
820 else:
821 Element.__init__(self, rawsource, *children, **attributes)
824 class FixedTextElement(TextElement):
826 """An element which directly contains preformatted text."""
828 def __init__(self, rawsource='', text='', *children, **attributes):
829 TextElement.__init__(self, rawsource, text, *children, **attributes)
830 self.attributes['xml:space'] = 'preserve'
833 # ========
834 # Mixins
835 # ========
837 class Resolvable:
839 resolved = 0
842 class BackLinkable:
844 def add_backref(self, refid):
845 self['backrefs'].append(refid)
848 # ====================
849 # Element Categories
850 # ====================
852 class Root: pass
854 class Titular: pass
856 class PreBibliographic:
857 """Category of Node which may occur before Bibliographic Nodes."""
859 class Bibliographic: pass
861 class Decorative(PreBibliographic): pass
863 class Structural: pass
865 class Body: pass
867 class General(Body): pass
869 class Sequential(Body):
870 """List-like elements."""
872 class Admonition(Body): pass
874 class Special(Body):
875 """Special internal body elements."""
877 class Invisible(PreBibliographic):
878 """Internal elements that don't appear in output."""
880 class Part: pass
882 class Inline: pass
884 class Referential(Resolvable): pass
887 class Targetable(Resolvable):
889 referenced = 0
891 indirect_reference_name = None
892 """Holds the whitespace_normalized_name (contains mixed case) of a target.
893 Required for MoinMoin/reST compatibility."""
896 class Labeled:
897 """Contains a `label` as its first element."""
900 # ==============
901 # Root Element
902 # ==============
904 class document(Root, Structural, Element):
907 The document root element.
909 Do not instantiate this class directly; use
910 `docutils.utils.new_document()` instead.
913 def __init__(self, settings, reporter, *args, **kwargs):
914 Element.__init__(self, *args, **kwargs)
916 self.current_source = None
917 """Path to or description of the input source being processed."""
919 self.current_line = None
920 """Line number (1-based) of `current_source`."""
922 self.settings = settings
923 """Runtime settings data record."""
925 self.reporter = reporter
926 """System message generator."""
928 self.indirect_targets = []
929 """List of indirect target nodes."""
931 self.substitution_defs = {}
932 """Mapping of substitution names to substitution_definition nodes."""
934 self.substitution_names = {}
935 """Mapping of case-normalized substitution names to case-sensitive
936 names."""
938 self.refnames = {}
939 """Mapping of names to lists of referencing nodes."""
941 self.refids = {}
942 """Mapping of ids to lists of referencing nodes."""
944 self.nameids = {}
945 """Mapping of names to unique id's."""
947 self.nametypes = {}
948 """Mapping of names to hyperlink type (boolean: True => explicit,
949 False => implicit."""
951 self.ids = {}
952 """Mapping of ids to nodes."""
954 self.footnote_refs = {}
955 """Mapping of footnote labels to lists of footnote_reference nodes."""
957 self.citation_refs = {}
958 """Mapping of citation labels to lists of citation_reference nodes."""
960 self.autofootnotes = []
961 """List of auto-numbered footnote nodes."""
963 self.autofootnote_refs = []
964 """List of auto-numbered footnote_reference nodes."""
966 self.symbol_footnotes = []
967 """List of symbol footnote nodes."""
969 self.symbol_footnote_refs = []
970 """List of symbol footnote_reference nodes."""
972 self.footnotes = []
973 """List of manually-numbered footnote nodes."""
975 self.citations = []
976 """List of citation nodes."""
978 self.autofootnote_start = 1
979 """Initial auto-numbered footnote number."""
981 self.symbol_footnote_start = 0
982 """Initial symbol footnote symbol index."""
984 self.id_start = 1
985 """Initial ID number."""
987 self.parse_messages = []
988 """System messages generated while parsing."""
990 self.transform_messages = []
991 """System messages generated while applying transforms."""
993 import docutils.transforms
994 self.transformer = docutils.transforms.Transformer(self)
995 """Storage for transforms to be applied to this document."""
997 self.decoration = None
998 """Document's `decoration` node."""
1000 self.document = self
1002 def __getstate__(self):
1004 Return dict with unpicklable references removed.
1006 state = self.__dict__.copy()
1007 state['reporter'] = None
1008 state['transformer'] = None
1009 return state
1011 def asdom(self, dom=None):
1012 """Return a DOM representation of this document."""
1013 if dom is None:
1014 import xml.dom.minidom as dom
1015 domroot = dom.Document()
1016 domroot.appendChild(self._dom_node(domroot))
1017 return domroot
1019 def set_id(self, node, msgnode=None):
1020 for id in node['ids']:
1021 if id in self.ids and self.ids[id] is not node:
1022 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1023 if msgnode != None:
1024 msgnode += msg
1025 if not node['ids']:
1026 for name in node['names']:
1027 id = self.settings.id_prefix + make_id(name)
1028 if id and id not in self.ids:
1029 break
1030 else:
1031 id = ''
1032 while not id or id in self.ids:
1033 id = (self.settings.id_prefix +
1034 self.settings.auto_id_prefix + str(self.id_start))
1035 self.id_start += 1
1036 node['ids'].append(id)
1037 self.ids[id] = node
1038 return id
1040 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1042 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1043 booleans representing hyperlink type (True==explicit,
1044 False==implicit). This method updates the mappings.
1046 The following state transition table shows how `self.nameids` ("ids")
1047 and `self.nametypes` ("types") change with new input (a call to this
1048 method), and what actions are performed ("implicit"-type system
1049 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1051 ==== ===== ======== ======== ======= ==== ===== =====
1052 Old State Input Action New State Notes
1053 ----------- -------- ----------------- ----------- -----
1054 ids types new type sys.msg. dupname ids types
1055 ==== ===== ======== ======== ======= ==== ===== =====
1056 - - explicit - - new True
1057 - - implicit - - new False
1058 None False explicit - - new True
1059 old False explicit implicit old new True
1060 None True explicit explicit new None True
1061 old True explicit explicit new,old None True [#]_
1062 None False implicit implicit new None False
1063 old False implicit implicit new,old None False
1064 None True implicit implicit new None True
1065 old True implicit implicit new old True
1066 ==== ===== ======== ======== ======= ==== ===== =====
1068 .. [#] Do not clear the name-to-id map or invalidate the old target if
1069 both old and new targets are external and refer to identical URIs.
1070 The new target is invalidated regardless.
1072 for name in node['names']:
1073 if name in self.nameids:
1074 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1075 else:
1076 self.nameids[name] = id
1077 self.nametypes[name] = explicit
1079 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1080 old_id = self.nameids[name]
1081 old_explicit = self.nametypes[name]
1082 self.nametypes[name] = old_explicit or explicit
1083 if explicit:
1084 if old_explicit:
1085 level = 2
1086 if old_id is not None:
1087 old_node = self.ids[old_id]
1088 if 'refuri' in node:
1089 refuri = node['refuri']
1090 if old_node['names'] \
1091 and 'refuri' in old_node \
1092 and old_node['refuri'] == refuri:
1093 level = 1 # just inform if refuri's identical
1094 if level > 1:
1095 dupname(old_node, name)
1096 self.nameids[name] = None
1097 msg = self.reporter.system_message(
1098 level, 'Duplicate explicit target name: "%s".' % name,
1099 backrefs=[id], base_node=node)
1100 if msgnode != None:
1101 msgnode += msg
1102 dupname(node, name)
1103 else:
1104 self.nameids[name] = id
1105 if old_id is not None:
1106 old_node = self.ids[old_id]
1107 dupname(old_node, name)
1108 else:
1109 if old_id is not None and not old_explicit:
1110 self.nameids[name] = None
1111 old_node = self.ids[old_id]
1112 dupname(old_node, name)
1113 dupname(node, name)
1114 if not explicit or (not old_explicit and old_id is not None):
1115 msg = self.reporter.info(
1116 'Duplicate implicit target name: "%s".' % name,
1117 backrefs=[id], base_node=node)
1118 if msgnode != None:
1119 msgnode += msg
1121 def has_name(self, name):
1122 return name in self.nameids
1124 # "note" here is an imperative verb: "take note of".
1125 def note_implicit_target(self, target, msgnode=None):
1126 id = self.set_id(target, msgnode)
1127 self.set_name_id_map(target, id, msgnode, explicit=None)
1129 def note_explicit_target(self, target, msgnode=None):
1130 id = self.set_id(target, msgnode)
1131 self.set_name_id_map(target, id, msgnode, explicit=True)
1133 def note_refname(self, node):
1134 self.refnames.setdefault(node['refname'], []).append(node)
1136 def note_refid(self, node):
1137 self.refids.setdefault(node['refid'], []).append(node)
1139 def note_indirect_target(self, target):
1140 self.indirect_targets.append(target)
1141 if target['names']:
1142 self.note_refname(target)
1144 def note_anonymous_target(self, target):
1145 self.set_id(target)
1147 def note_autofootnote(self, footnote):
1148 self.set_id(footnote)
1149 self.autofootnotes.append(footnote)
1151 def note_autofootnote_ref(self, ref):
1152 self.set_id(ref)
1153 self.autofootnote_refs.append(ref)
1155 def note_symbol_footnote(self, footnote):
1156 self.set_id(footnote)
1157 self.symbol_footnotes.append(footnote)
1159 def note_symbol_footnote_ref(self, ref):
1160 self.set_id(ref)
1161 self.symbol_footnote_refs.append(ref)
1163 def note_footnote(self, footnote):
1164 self.set_id(footnote)
1165 self.footnotes.append(footnote)
1167 def note_footnote_ref(self, ref):
1168 self.set_id(ref)
1169 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1170 self.note_refname(ref)
1172 def note_citation(self, citation):
1173 self.citations.append(citation)
1175 def note_citation_ref(self, ref):
1176 self.set_id(ref)
1177 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1178 self.note_refname(ref)
1180 def note_substitution_def(self, subdef, def_name, msgnode=None):
1181 name = whitespace_normalize_name(def_name)
1182 if name in self.substitution_defs:
1183 msg = self.reporter.error(
1184 'Duplicate substitution definition name: "%s".' % name,
1185 base_node=subdef)
1186 if msgnode != None:
1187 msgnode += msg
1188 oldnode = self.substitution_defs[name]
1189 dupname(oldnode, name)
1190 # keep only the last definition:
1191 self.substitution_defs[name] = subdef
1192 # case-insensitive mapping:
1193 self.substitution_names[fully_normalize_name(name)] = name
1195 def note_substitution_ref(self, subref, refname):
1196 subref['refname'] = whitespace_normalize_name(refname)
1198 def note_pending(self, pending, priority=None):
1199 self.transformer.add_pending(pending, priority)
1201 def note_parse_message(self, message):
1202 self.parse_messages.append(message)
1204 def note_transform_message(self, message):
1205 self.transform_messages.append(message)
1207 def note_source(self, source, offset):
1208 self.current_source = source
1209 if offset is None:
1210 self.current_line = offset
1211 else:
1212 self.current_line = offset + 1
1214 def copy(self):
1215 return self.__class__(self.settings, self.reporter,
1216 **self.attributes)
1218 def get_decoration(self):
1219 if not self.decoration:
1220 self.decoration = decoration()
1221 index = self.first_child_not_matching_class(Titular)
1222 if index is None:
1223 self.append(self.decoration)
1224 else:
1225 self.insert(index, self.decoration)
1226 return self.decoration
1229 # ================
1230 # Title Elements
1231 # ================
1233 class title(Titular, PreBibliographic, TextElement): pass
1234 class subtitle(Titular, PreBibliographic, TextElement): pass
1235 class rubric(Titular, TextElement): pass
1238 # ========================
1239 # Bibliographic Elements
1240 # ========================
1242 class docinfo(Bibliographic, Element): pass
1243 class author(Bibliographic, TextElement): pass
1244 class authors(Bibliographic, Element): pass
1245 class organization(Bibliographic, TextElement): pass
1246 class address(Bibliographic, FixedTextElement): pass
1247 class contact(Bibliographic, TextElement): pass
1248 class version(Bibliographic, TextElement): pass
1249 class revision(Bibliographic, TextElement): pass
1250 class status(Bibliographic, TextElement): pass
1251 class date(Bibliographic, TextElement): pass
1252 class copyright(Bibliographic, TextElement): pass
1255 # =====================
1256 # Decorative Elements
1257 # =====================
1259 class decoration(Decorative, Element):
1261 def get_header(self):
1262 if not len(self.children) or not isinstance(self.children[0], header):
1263 self.insert(0, header())
1264 return self.children[0]
1266 def get_footer(self):
1267 if not len(self.children) or not isinstance(self.children[-1], footer):
1268 self.append(footer())
1269 return self.children[-1]
1272 class header(Decorative, Element): pass
1273 class footer(Decorative, Element): pass
1276 # =====================
1277 # Structural Elements
1278 # =====================
1280 class section(Structural, Element): pass
1283 class topic(Structural, Element):
1286 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1287 or textual figures. A topic is just like a section, except that it has no
1288 subsections, and it doesn't have to conform to section placement rules.
1290 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1291 but only at the top level of a section or document. Topics cannot nest
1292 inside topics, sidebars, or body elements; you can't have a topic inside a
1293 table, list, block quote, etc.
1297 class sidebar(Structural, Element):
1300 Sidebars are like miniature, parallel documents that occur inside other
1301 documents, providing related or reference material. A sidebar is
1302 typically offset by a border and "floats" to the side of the page; the
1303 document's main text may flow around it. Sidebars can also be likened to
1304 super-footnotes; their content is outside of the flow of the document's
1305 main text.
1307 Sidebars are allowed wherever body elements (list, table, etc.) are
1308 allowed, but only at the top level of a section or document. Sidebars
1309 cannot nest inside sidebars, topics, or body elements; you can't have a
1310 sidebar inside a table, list, block quote, etc.
1314 class transition(Structural, Element): pass
1317 # ===============
1318 # Body Elements
1319 # ===============
1321 class paragraph(General, TextElement): pass
1322 class compound(General, Element): pass
1323 class container(General, Element): pass
1324 class bullet_list(Sequential, Element): pass
1325 class enumerated_list(Sequential, Element): pass
1326 class list_item(Part, Element): pass
1327 class definition_list(Sequential, Element): pass
1328 class definition_list_item(Part, Element): pass
1329 class term(Part, TextElement): pass
1330 class classifier(Part, TextElement): pass
1331 class definition(Part, Element): pass
1332 class field_list(Sequential, Element): pass
1333 class field(Part, Element): pass
1334 class field_name(Part, TextElement): pass
1335 class field_body(Part, Element): pass
1338 class option(Part, Element):
1340 child_text_separator = ''
1343 class option_argument(Part, TextElement):
1345 def astext(self):
1346 return self.get('delimiter', ' ') + TextElement.astext(self)
1349 class option_group(Part, Element):
1351 child_text_separator = ', '
1354 class option_list(Sequential, Element): pass
1357 class option_list_item(Part, Element):
1359 child_text_separator = ' '
1362 class option_string(Part, TextElement): pass
1363 class description(Part, Element): pass
1364 class literal_block(General, FixedTextElement): pass
1365 class doctest_block(General, FixedTextElement): pass
1366 class math_block(General, FixedTextElement): pass
1367 class line_block(General, Element): pass
1370 class line(Part, TextElement):
1372 indent = None
1375 class block_quote(General, Element): pass
1376 class attribution(Part, TextElement): pass
1377 class attention(Admonition, Element): pass
1378 class caution(Admonition, Element): pass
1379 class danger(Admonition, Element): pass
1380 class error(Admonition, Element): pass
1381 class important(Admonition, Element): pass
1382 class note(Admonition, Element): pass
1383 class tip(Admonition, Element): pass
1384 class hint(Admonition, Element): pass
1385 class warning(Admonition, Element): pass
1386 class admonition(Admonition, Element): pass
1387 class comment(Special, Invisible, FixedTextElement): pass
1388 class substitution_definition(Special, Invisible, TextElement): pass
1389 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1390 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1391 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1392 class label(Part, TextElement): pass
1393 class figure(General, Element): pass
1394 class caption(Part, TextElement): pass
1395 class legend(Part, Element): pass
1396 class table(General, Element): pass
1397 class tgroup(Part, Element): pass
1398 class colspec(Part, Element): pass
1399 class thead(Part, Element): pass
1400 class tbody(Part, Element): pass
1401 class row(Part, Element): pass
1402 class entry(Part, Element): pass
1405 class system_message(Special, BackLinkable, PreBibliographic, Element):
1408 System message element.
1410 Do not instantiate this class directly; use
1411 ``document.reporter.info/warning/error/severe()`` instead.
1414 def __init__(self, message=None, *children, **attributes):
1415 if message:
1416 p = paragraph('', message)
1417 children = (p,) + children
1418 try:
1419 Element.__init__(self, '', *children, **attributes)
1420 except:
1421 print 'system_message: children=%r' % (children,)
1422 raise
1424 def astext(self):
1425 line = self.get('line', '')
1426 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1427 self['level'], Element.astext(self))
1430 class pending(Special, Invisible, Element):
1433 The "pending" element is used to encapsulate a pending operation: the
1434 operation (transform), the point at which to apply it, and any data it
1435 requires. Only the pending operation's location within the document is
1436 stored in the public document tree (by the "pending" object itself); the
1437 operation and its data are stored in the "pending" object's internal
1438 instance attributes.
1440 For example, say you want a table of contents in your reStructuredText
1441 document. The easiest way to specify where to put it is from within the
1442 document, with a directive::
1444 .. contents::
1446 But the "contents" directive can't do its work until the entire document
1447 has been parsed and possibly transformed to some extent. So the directive
1448 code leaves a placeholder behind that will trigger the second phase of its
1449 processing, something like this::
1451 <pending ...public attributes...> + internal attributes
1453 Use `document.note_pending()` so that the
1454 `docutils.transforms.Transformer` stage of processing can run all pending
1455 transforms.
1458 def __init__(self, transform, details=None,
1459 rawsource='', *children, **attributes):
1460 Element.__init__(self, rawsource, *children, **attributes)
1462 self.transform = transform
1463 """The `docutils.transforms.Transform` class implementing the pending
1464 operation."""
1466 self.details = details or {}
1467 """Detail data (dictionary) required by the pending operation."""
1469 def pformat(self, indent=' ', level=0):
1470 internals = [
1471 '.. internal attributes:',
1472 ' .transform: %s.%s' % (self.transform.__module__,
1473 self.transform.__name__),
1474 ' .details:']
1475 details = self.details.items()
1476 details.sort()
1477 for key, value in details:
1478 if isinstance(value, Node):
1479 internals.append('%7s%s:' % ('', key))
1480 internals.extend(['%9s%s' % ('', line)
1481 for line in value.pformat().splitlines()])
1482 elif value and isinstance(value, list) \
1483 and isinstance(value[0], Node):
1484 internals.append('%7s%s:' % ('', key))
1485 for v in value:
1486 internals.extend(['%9s%s' % ('', line)
1487 for line in v.pformat().splitlines()])
1488 else:
1489 internals.append('%7s%s: %r' % ('', key, value))
1490 return (Element.pformat(self, indent, level)
1491 + ''.join([(' %s%s\n' % (indent * level, line))
1492 for line in internals]))
1494 def copy(self):
1495 return self.__class__(self.transform, self.details, self.rawsource,
1496 **self.attributes)
1499 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1502 Raw data that is to be passed untouched to the Writer.
1505 pass
1508 # =================
1509 # Inline Elements
1510 # =================
1512 class emphasis(Inline, TextElement): pass
1513 class strong(Inline, TextElement): pass
1514 class literal(Inline, TextElement): pass
1515 class reference(General, Inline, Referential, TextElement): pass
1516 class footnote_reference(Inline, Referential, TextElement): pass
1517 class citation_reference(Inline, Referential, TextElement): pass
1518 class substitution_reference(Inline, TextElement): pass
1519 class title_reference(Inline, TextElement): pass
1520 class abbreviation(Inline, TextElement): pass
1521 class acronym(Inline, TextElement): pass
1522 class superscript(Inline, TextElement): pass
1523 class subscript(Inline, TextElement): pass
1524 class math(Inline, TextElement): pass
1527 class image(General, Inline, Element):
1529 def astext(self):
1530 return self.get('alt', '')
1533 class inline(Inline, TextElement): pass
1534 class problematic(Inline, TextElement): pass
1535 class generated(Inline, TextElement): pass
1538 # ========================================
1539 # Auxiliary Classes, Functions, and Data
1540 # ========================================
1542 node_class_names = """
1543 Text
1544 abbreviation acronym address admonition attention attribution author
1545 authors
1546 block_quote bullet_list
1547 caption caution citation citation_reference classifier colspec comment
1548 compound contact container copyright
1549 danger date decoration definition definition_list definition_list_item
1550 description docinfo doctest_block document
1551 emphasis entry enumerated_list error
1552 field field_body field_list field_name figure footer
1553 footnote footnote_reference
1554 generated
1555 header hint
1556 image important inline
1557 label legend line line_block list_item literal literal_block
1558 math math_block
1559 note
1560 option option_argument option_group option_list option_list_item
1561 option_string organization
1562 paragraph pending problematic
1563 raw reference revision row rubric
1564 section sidebar status strong subscript substitution_definition
1565 substitution_reference subtitle superscript system_message
1566 table target tbody term tgroup thead tip title title_reference topic
1567 transition
1568 version
1569 warning""".split()
1570 """A list of names of all concrete Node subclasses."""
1573 class NodeVisitor:
1576 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1577 document tree traversals.
1579 Each node class has corresponding methods, doing nothing by
1580 default; override individual methods for specific and useful
1581 behaviour. The `dispatch_visit()` method is called by
1582 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1583 the `dispatch_departure()` method before exiting a node.
1585 The dispatch methods call "``visit_`` + node class name" or
1586 "``depart_`` + node class name", resp.
1588 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1589 methods should be implemented for *all* node types encountered (such as
1590 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1591 raise exceptions.
1593 For sparse traversals, where only certain node types are of interest,
1594 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1595 processing is desired, subclass `GenericNodeVisitor`.
1597 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1598 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1599 1995.
1602 optional = ()
1604 Tuple containing node class names (as strings).
1606 No exception will be raised if writers do not implement visit
1607 or departure functions for these node classes.
1609 Used to ensure transitional compatibility with existing 3rd-party writers.
1612 def __init__(self, document):
1613 self.document = document
1615 def dispatch_visit(self, node):
1617 Call self."``visit_`` + node class name" with `node` as
1618 parameter. If the ``visit_...`` method does not exist, call
1619 self.unknown_visit.
1621 node_name = node.__class__.__name__
1622 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1623 self.document.reporter.debug(
1624 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1625 % (method.__name__, node_name))
1626 return method(node)
1628 def dispatch_departure(self, node):
1630 Call self."``depart_`` + node class name" with `node` as
1631 parameter. If the ``depart_...`` method does not exist, call
1632 self.unknown_departure.
1634 node_name = node.__class__.__name__
1635 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1636 self.document.reporter.debug(
1637 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1638 % (method.__name__, node_name))
1639 return method(node)
1641 def unknown_visit(self, node):
1643 Called when entering unknown `Node` types.
1645 Raise an exception unless overridden.
1647 if (self.document.settings.strict_visitor
1648 or node.__class__.__name__ not in self.optional):
1649 raise NotImplementedError(
1650 '%s visiting unknown node type: %s'
1651 % (self.__class__, node.__class__.__name__))
1653 def unknown_departure(self, node):
1655 Called before exiting unknown `Node` types.
1657 Raise exception unless overridden.
1659 if (self.document.settings.strict_visitor
1660 or node.__class__.__name__ not in self.optional):
1661 raise NotImplementedError(
1662 '%s departing unknown node type: %s'
1663 % (self.__class__, node.__class__.__name__))
1666 class SparseNodeVisitor(NodeVisitor):
1669 Base class for sparse traversals, where only certain node types are of
1670 interest. When ``visit_...`` & ``depart_...`` methods should be
1671 implemented for *all* node types (such as for `docutils.writers.Writer`
1672 subclasses), subclass `NodeVisitor` instead.
1676 class GenericNodeVisitor(NodeVisitor):
1679 Generic "Visitor" abstract superclass, for simple traversals.
1681 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1682 each ``depart_...`` method (when using `Node.walkabout()`) calls
1683 `default_departure()`. `default_visit()` (and `default_departure()`) must
1684 be overridden in subclasses.
1686 Define fully generic visitors by overriding `default_visit()` (and
1687 `default_departure()`) only. Define semi-generic visitors by overriding
1688 individual ``visit_...()`` (and ``depart_...()``) methods also.
1690 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1691 be overridden for default behavior.
1694 def default_visit(self, node):
1695 """Override for generic, uniform traversals."""
1696 raise NotImplementedError
1698 def default_departure(self, node):
1699 """Override for generic, uniform traversals."""
1700 raise NotImplementedError
1702 def _call_default_visit(self, node):
1703 self.default_visit(node)
1705 def _call_default_departure(self, node):
1706 self.default_departure(node)
1708 def _nop(self, node):
1709 pass
1711 def _add_node_class_names(names):
1712 """Save typing with dynamic assignments:"""
1713 for _name in names:
1714 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1715 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1716 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1717 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1719 _add_node_class_names(node_class_names)
1722 class TreeCopyVisitor(GenericNodeVisitor):
1725 Make a complete copy of a tree or branch, including element attributes.
1728 def __init__(self, document):
1729 GenericNodeVisitor.__init__(self, document)
1730 self.parent_stack = []
1731 self.parent = []
1733 def get_tree_copy(self):
1734 return self.parent[0]
1736 def default_visit(self, node):
1737 """Copy the current node, and make it the new acting parent."""
1738 newnode = node.copy()
1739 self.parent.append(newnode)
1740 self.parent_stack.append(self.parent)
1741 self.parent = newnode
1743 def default_departure(self, node):
1744 """Restore the previous acting parent."""
1745 self.parent = self.parent_stack.pop()
1748 class TreePruningException(Exception):
1751 Base class for `NodeVisitor`-related tree pruning exceptions.
1753 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1754 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1755 the tree traversed.
1758 pass
1761 class SkipChildren(TreePruningException):
1764 Do not visit any children of the current node. The current node's
1765 siblings and ``depart_...`` method are not affected.
1768 pass
1771 class SkipSiblings(TreePruningException):
1774 Do not visit any more siblings (to the right) of the current node. The
1775 current node's children and its ``depart_...`` method are not affected.
1778 pass
1781 class SkipNode(TreePruningException):
1784 Do not visit the current node's children, and do not call the current
1785 node's ``depart_...`` method.
1788 pass
1791 class SkipDeparture(TreePruningException):
1794 Do not call the current node's ``depart_...`` method. The current node's
1795 children and siblings are not affected.
1798 pass
1801 class NodeFound(TreePruningException):
1804 Raise to indicate that the target of a search has been found. This
1805 exception must be caught by the client; it is not caught by the traversal
1806 code.
1809 pass
1812 class StopTraversal(TreePruningException):
1815 Stop the traversal alltogether. The current node's ``depart_...`` method
1816 is not affected. The parent nodes ``depart_...`` methods are also called
1817 as usual. No other nodes are visited. This is an alternative to
1818 NodeFound that does not cause exception handling to trickle up to the
1819 caller.
1822 pass
1825 def make_id(string):
1827 Convert `string` into an identifier and return it.
1829 Docutils identifiers will conform to the regular expression
1830 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1831 and "id" attributes) should have no underscores, colons, or periods.
1832 Hyphens may be used.
1834 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1836 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1837 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1838 underscores ("_"), colons (":"), and periods (".").
1840 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1841 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1842 "escape" 8-bit characters have been replaced with entities)::
1844 unicode \\[0-9a-f]{1,4}
1845 latin1 [&iexcl;-&yuml;]
1846 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1847 nmchar [-a-z0-9]|{latin1}|{escape}
1848 name {nmchar}+
1850 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1851 or periods ("."), therefore "class" and "id" attributes should not contain
1852 these characters. They should be replaced with hyphens ("-"). Combined
1853 with HTML's requirements (the first character must be a letter; no
1854 "unicode", "latin1", or "escape" characters), this results in the
1855 ``[a-z](-?[a-z0-9]+)*`` pattern.
1857 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1858 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1860 id = string.lower()
1861 if not isinstance(id, unicode):
1862 id = id.decode()
1863 id = id.translate(_non_id_translate_digraphs)
1864 id = id.translate(_non_id_translate)
1865 # get rid of non-ascii characters.
1866 # 'ascii' lowercase to prevent problems with turkish locale.
1867 id = unicodedata.normalize('NFKD', id).\
1868 encode('ascii', 'ignore').decode('ascii')
1869 # shrink runs of whitespace and replace by hyphen
1870 id = _non_id_chars.sub('-', ' '.join(id.split()))
1871 id = _non_id_at_ends.sub('', id)
1872 return str(id)
1874 _non_id_chars = re.compile('[^a-z0-9]+')
1875 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1876 _non_id_translate = {
1877 0x00f8: u'o', # o with stroke
1878 0x0111: u'd', # d with stroke
1879 0x0127: u'h', # h with stroke
1880 0x0131: u'i', # dotless i
1881 0x0142: u'l', # l with stroke
1882 0x0167: u't', # t with stroke
1883 0x0180: u'b', # b with stroke
1884 0x0183: u'b', # b with topbar
1885 0x0188: u'c', # c with hook
1886 0x018c: u'd', # d with topbar
1887 0x0192: u'f', # f with hook
1888 0x0199: u'k', # k with hook
1889 0x019a: u'l', # l with bar
1890 0x019e: u'n', # n with long right leg
1891 0x01a5: u'p', # p with hook
1892 0x01ab: u't', # t with palatal hook
1893 0x01ad: u't', # t with hook
1894 0x01b4: u'y', # y with hook
1895 0x01b6: u'z', # z with stroke
1896 0x01e5: u'g', # g with stroke
1897 0x0225: u'z', # z with hook
1898 0x0234: u'l', # l with curl
1899 0x0235: u'n', # n with curl
1900 0x0236: u't', # t with curl
1901 0x0237: u'j', # dotless j
1902 0x023c: u'c', # c with stroke
1903 0x023f: u's', # s with swash tail
1904 0x0240: u'z', # z with swash tail
1905 0x0247: u'e', # e with stroke
1906 0x0249: u'j', # j with stroke
1907 0x024b: u'q', # q with hook tail
1908 0x024d: u'r', # r with stroke
1909 0x024f: u'y', # y with stroke
1911 _non_id_translate_digraphs = {
1912 0x00df: u'sz', # ligature sz
1913 0x00e6: u'ae', # ae
1914 0x0153: u'oe', # ligature oe
1915 0x0238: u'db', # db digraph
1916 0x0239: u'qp', # qp digraph
1919 def dupname(node, name):
1920 node['dupnames'].append(name)
1921 node['names'].remove(name)
1922 # Assume that this method is referenced, even though it isn't; we
1923 # don't want to throw unnecessary system_messages.
1924 node.referenced = 1
1926 def fully_normalize_name(name):
1927 """Return a case- and whitespace-normalized name."""
1928 return ' '.join(name.lower().split())
1930 def whitespace_normalize_name(name):
1931 """Return a whitespace-normalized name."""
1932 return ' '.join(name.split())
1934 def serial_escape(value):
1935 """Escape string values that are elements of a list, for serialization."""
1936 return value.replace('\\', r'\\').replace(' ', r'\ ')
1938 def pseudo_quoteattr(value):
1939 """Quote attributes for pseudo-xml"""
1940 return '"%s"' % value
1942 # \f
1944 # Local Variables:
1945 # indent-tabs-mode: nil
1946 # sentence-end-double-space: t
1947 # fill-column: 78
1948 # End: