Add cross link to "code" role.
[docutils.git] / docutils / nodes.py
blobf88de28ab68b5c495128c696a7d885e2812499c4
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Maintainer: docutils-develop@lists.sourceforge.net
4 # Copyright: This module has been placed in the public domain.
6 """
7 Docutils document tree element class library.
9 Classes in CamelCase are abstract base classes or auxiliary classes. The one
10 exception is `Text`, for a text (PCDATA) node; uppercase is used to
11 differentiate from element classes. Classes in lower_case_with_underscores
12 are element classes, matching the XML element generic identifiers in the DTD_.
14 The position of each node (the level at which it can occur) is significant and
15 is represented by abstract base classes (`Root`, `Structural`, `Body`,
16 `Inline`, etc.). Certain transformations will be easier because we can use
17 ``isinstance(node, base_class)`` to determine the position of the node in the
18 hierarchy.
20 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
21 """
23 __docformat__ = 'reStructuredText'
25 import sys
26 import os
27 import re
28 import warnings
29 import types
30 import unicodedata
32 # ==============================
33 # Functional Node Base Classes
34 # ==============================
36 class Node(object):
38 """Abstract base class of nodes in a document tree."""
40 parent = None
41 """Back-reference to the Node immediately containing this Node."""
43 document = None
44 """The `document` node at the root of the tree containing this Node."""
46 source = None
47 """Path or description of the input source which generated this Node."""
49 line = None
50 """The line number (1-based) of the beginning of this Node in `source`."""
52 def __nonzero__(self):
53 """
54 Node instances are always true, even if they're empty. A node is more
55 than a simple container. Its boolean "truth" does not depend on
56 having one or more subnodes in the doctree.
58 Use `len()` to check node length. Use `None` to represent a boolean
59 false value.
60 """
61 return True
63 if sys.version_info < (3,):
64 # on 2.x, str(node) will be a byte string with Unicode
65 # characters > 255 escaped; on 3.x this is no longer necessary
66 def __str__(self):
67 return unicode(self).encode('raw_unicode_escape')
69 def asdom(self, dom=None):
70 """Return a DOM **fragment** representation of this Node."""
71 if dom is None:
72 import xml.dom.minidom as dom
73 domroot = dom.Document()
74 return self._dom_node(domroot)
76 def pformat(self, indent=' ', level=0):
77 """
78 Return an indented pseudo-XML representation, for test purposes.
80 Override in subclasses.
81 """
82 raise NotImplementedError
84 def copy(self):
85 """Return a copy of self."""
86 raise NotImplementedError
88 def deepcopy(self):
89 """Return a deep copy of self (also copying children)."""
90 raise NotImplementedError
92 def setup_child(self, child):
93 child.parent = self
94 if self.document:
95 child.document = self.document
96 if child.source is None:
97 child.source = self.document.current_source
98 if child.line is None:
99 child.line = self.document.current_line
101 def walk(self, visitor):
103 Traverse a tree of `Node` objects, calling the
104 `dispatch_visit()` method of `visitor` when entering each
105 node. (The `walkabout()` method is similar, except it also
106 calls the `dispatch_departure()` method before exiting each
107 node.)
109 This tree traversal supports limited in-place tree
110 modifications. Replacing one node with one or more nodes is
111 OK, as is removing an element. However, if the node removed
112 or replaced occurs after the current node, the old node will
113 still be traversed, and any new nodes will not.
115 Within ``visit`` methods (and ``depart`` methods for
116 `walkabout()`), `TreePruningException` subclasses may be raised
117 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
119 Parameter `visitor`: A `NodeVisitor` object, containing a
120 ``visit`` implementation for each `Node` subclass encountered.
122 Return true if we should stop the traversal.
124 stop = False
125 visitor.document.reporter.debug(
126 'docutils.nodes.Node.walk calling dispatch_visit for %s'
127 % self.__class__.__name__)
128 try:
129 try:
130 visitor.dispatch_visit(self)
131 except (SkipChildren, SkipNode):
132 return stop
133 except SkipDeparture: # not applicable; ignore
134 pass
135 children = self.children
136 try:
137 for child in children[:]:
138 if child.walk(visitor):
139 stop = True
140 break
141 except SkipSiblings:
142 pass
143 except StopTraversal:
144 stop = True
145 return stop
147 def walkabout(self, visitor):
149 Perform a tree traversal similarly to `Node.walk()` (which
150 see), except also call the `dispatch_departure()` method
151 before exiting each node.
153 Parameter `visitor`: A `NodeVisitor` object, containing a
154 ``visit`` and ``depart`` implementation for each `Node`
155 subclass encountered.
157 Return true if we should stop the traversal.
159 call_depart = True
160 stop = False
161 visitor.document.reporter.debug(
162 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
163 % self.__class__.__name__)
164 try:
165 try:
166 visitor.dispatch_visit(self)
167 except SkipNode:
168 return stop
169 except SkipDeparture:
170 call_depart = False
171 children = self.children
172 try:
173 for child in children[:]:
174 if child.walkabout(visitor):
175 stop = True
176 break
177 except SkipSiblings:
178 pass
179 except SkipChildren:
180 pass
181 except StopTraversal:
182 stop = True
183 if call_depart:
184 visitor.document.reporter.debug(
185 'docutils.nodes.Node.walkabout calling dispatch_departure '
186 'for %s' % self.__class__.__name__)
187 visitor.dispatch_departure(self)
188 return stop
190 def _fast_traverse(self, cls):
191 """Specialized traverse() that only supports instance checks."""
192 result = []
193 if isinstance(self, cls):
194 result.append(self)
195 for child in self.children:
196 result.extend(child._fast_traverse(cls))
197 return result
199 def _all_traverse(self):
200 """Specialized traverse() that doesn't check for a condition."""
201 result = []
202 result.append(self)
203 for child in self.children:
204 result.extend(child._all_traverse())
205 return result
207 def traverse(self, condition=None, include_self=True, descend=True,
208 siblings=False, ascend=False):
210 Return an iterable containing
212 * self (if include_self is true)
213 * all descendants in tree traversal order (if descend is true)
214 * all siblings (if siblings is true) and their descendants (if
215 also descend is true)
216 * the siblings of the parent (if ascend is true) and their
217 descendants (if also descend is true), and so on
219 If `condition` is not None, the iterable contains only nodes
220 for which ``condition(node)`` is true. If `condition` is a
221 node class ``cls``, it is equivalent to a function consisting
222 of ``return isinstance(node, cls)``.
224 If ascend is true, assume siblings to be true as well.
226 For example, given the following tree::
228 <paragraph>
229 <emphasis> <--- emphasis.traverse() and
230 <strong> <--- strong.traverse() are called.
233 <reference name="Baz" refid="baz">
236 Then list(emphasis.traverse()) equals ::
238 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
240 and list(strong.traverse(ascend=True)) equals ::
242 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
244 if ascend:
245 siblings=True
246 # Check for special argument combinations that allow using an
247 # optimized version of traverse()
248 if include_self and descend and not siblings:
249 if condition is None:
250 return self._all_traverse()
251 elif isinstance(condition, (types.ClassType, type)):
252 return self._fast_traverse(condition)
253 # Check if `condition` is a class (check for TypeType for Python
254 # implementations that use only new-style classes, like PyPy).
255 if isinstance(condition, (types.ClassType, type)):
256 node_class = condition
257 def condition(node, node_class=node_class):
258 return isinstance(node, node_class)
259 r = []
260 if include_self and (condition is None or condition(self)):
261 r.append(self)
262 if descend and len(self.children):
263 for child in self:
264 r.extend(child.traverse(include_self=True, descend=True,
265 siblings=False, ascend=False,
266 condition=condition))
267 if siblings or ascend:
268 node = self
269 while node.parent:
270 index = node.parent.index(node)
271 for sibling in node.parent[index+1:]:
272 r.extend(sibling.traverse(include_self=True,
273 descend=descend,
274 siblings=False, ascend=False,
275 condition=condition))
276 if not ascend:
277 break
278 else:
279 node = node.parent
280 return r
282 def next_node(self, condition=None, include_self=False, descend=True,
283 siblings=False, ascend=False):
285 Return the first node in the iterable returned by traverse(),
286 or None if the iterable is empty.
288 Parameter list is the same as of traverse. Note that
289 include_self defaults to 0, though.
291 iterable = self.traverse(condition=condition,
292 include_self=include_self, descend=descend,
293 siblings=siblings, ascend=ascend)
294 try:
295 return iterable[0]
296 except IndexError:
297 return None
299 if sys.version_info < (3,):
300 class reprunicode(unicode):
302 A class that removes the initial u from unicode's repr.
305 def __repr__(self):
306 return unicode.__repr__(self)[1:]
307 else:
308 reprunicode = unicode
311 class Text(Node, reprunicode):
314 Instances are terminal nodes (leaves) containing text only; no child
315 nodes or attributes. Initialize by passing a string to the constructor.
316 Access the text itself with the `astext` method.
319 tagname = '#text'
321 children = ()
322 """Text nodes have no children, and cannot have children."""
324 if sys.version_info > (3,):
325 def __new__(cls, data, rawsource=None):
326 """Prevent the rawsource argument from propagating to str."""
327 if isinstance(data, bytes):
328 raise TypeError('expecting str data, not bytes')
329 return reprunicode.__new__(cls, data)
330 else:
331 def __new__(cls, data, rawsource=None):
332 """Prevent the rawsource argument from propagating to str."""
333 return reprunicode.__new__(cls, data)
335 def __init__(self, data, rawsource=''):
337 self.rawsource = rawsource
338 """The raw text from which this element was constructed."""
340 def shortrepr(self, maxlen=18):
341 data = self
342 if len(data) > maxlen:
343 data = data[:maxlen-4] + ' ...'
344 return '<%s: %s>' % (self.tagname, repr(reprunicode(data)))
346 def __repr__(self):
347 return self.shortrepr(maxlen=68)
349 def _dom_node(self, domroot):
350 return domroot.createTextNode(unicode(self))
352 def astext(self):
353 return reprunicode(self)
355 # Note about __unicode__: The implementation of __unicode__ here,
356 # and the one raising NotImplemented in the superclass Node had
357 # to be removed when changing Text to a subclass of unicode instead
358 # of UserString, since there is no way to delegate the __unicode__
359 # call to the superclass unicode:
360 # unicode itself does not have __unicode__ method to delegate to
361 # and calling unicode(self) or unicode.__new__ directly creates
362 # an infinite loop
364 def copy(self):
365 return self.__class__(reprunicode(self), rawsource=self.rawsource)
367 def deepcopy(self):
368 return self.copy()
370 def pformat(self, indent=' ', level=0):
371 result = []
372 indent = indent * level
373 for line in self.splitlines():
374 result.append(indent + line + '\n')
375 return ''.join(result)
377 # rstrip and lstrip are used by substitution definitions where
378 # they are expected to return a Text instance, this was formerly
379 # taken care of by UserString. Note that then and now the
380 # rawsource member is lost.
382 def rstrip(self, chars=None):
383 return self.__class__(reprunicode.rstrip(self, chars))
384 def lstrip(self, chars=None):
385 return self.__class__(reprunicode.lstrip(self, chars))
387 class Element(Node):
390 `Element` is the superclass to all specific elements.
392 Elements contain attributes and child nodes. Elements emulate
393 dictionaries for attributes, indexing by attribute name (a string). To
394 set the attribute 'att' to 'value', do::
396 element['att'] = 'value'
398 There are two special attributes: 'ids' and 'names'. Both are
399 lists of unique identifiers, and names serve as human interfaces
400 to IDs. Names are case- and whitespace-normalized (see the
401 fully_normalize_name() function), and IDs conform to the regular
402 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
404 Elements also emulate lists for child nodes (element nodes and/or text
405 nodes), indexing by integer. To get the first child node, use::
407 element[0]
409 Elements may be constructed using the ``+=`` operator. To add one new
410 child node to element, do::
412 element += node
414 This is equivalent to ``element.append(node)``.
416 To add a list of multiple child nodes at once, use the same ``+=``
417 operator::
419 element += [node1, node2]
421 This is equivalent to ``element.extend([node1, node2])``.
424 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
425 """List attributes, automatically initialized to empty lists for
426 all nodes."""
428 tagname = None
429 """The element generic identifier. If None, it is set as an instance
430 attribute to the name of the class."""
432 child_text_separator = '\n\n'
433 """Separator for child nodes, used by `astext()` method."""
435 def __init__(self, rawsource='', *children, **attributes):
436 self.rawsource = rawsource
437 """The raw text from which this element was constructed."""
439 self.children = []
440 """List of child nodes (elements and/or `Text`)."""
442 self.extend(children) # maintain parent info
444 self.attributes = {}
445 """Dictionary of attribute {name: value}."""
447 # Initialize list attributes.
448 for att in self.list_attributes:
449 self.attributes[att] = []
451 for att, value in attributes.items():
452 att = att.lower()
453 if att in self.list_attributes:
454 # mutable list; make a copy for this node
455 self.attributes[att] = value[:]
456 else:
457 self.attributes[att] = value
459 if self.tagname is None:
460 self.tagname = self.__class__.__name__
462 def _dom_node(self, domroot):
463 element = domroot.createElement(self.tagname)
464 for attribute, value in self.attlist():
465 if isinstance(value, list):
466 value = ' '.join([serial_escape('%s' % (v,)) for v in value])
467 element.setAttribute(attribute, '%s' % value)
468 for child in self.children:
469 element.appendChild(child._dom_node(domroot))
470 return element
472 def __repr__(self):
473 data = ''
474 for c in self.children:
475 data += c.shortrepr()
476 if len(data) > 60:
477 data = data[:56] + ' ...'
478 break
479 if self['names']:
480 return '<%s "%s": %s>' % (self.__class__.__name__,
481 '; '.join(self['names']), data)
482 else:
483 return '<%s: %s>' % (self.__class__.__name__, data)
485 def shortrepr(self):
486 if self['names']:
487 return '<%s "%s"...>' % (self.__class__.__name__,
488 '; '.join(self['names']))
489 else:
490 return '<%s...>' % self.tagname
492 def __unicode__(self):
493 if self.children:
494 return u'%s%s%s' % (self.starttag(),
495 ''.join([unicode(c) for c in self.children]),
496 self.endtag())
497 else:
498 return self.emptytag()
500 if sys.version_info > (3,):
501 # 2to3 doesn't convert __unicode__ to __str__
502 __str__ = __unicode__
504 def starttag(self, quoteattr=None):
505 # the optional arg is used by the docutils_xml writer
506 if quoteattr is None:
507 quoteattr = pseudo_quoteattr
508 parts = [self.tagname]
509 for name, value in self.attlist():
510 if value is None: # boolean attribute
511 parts.append(name)
512 continue
513 if isinstance(value, list):
514 values = [serial_escape('%s' % (v,)) for v in value]
515 value = ' '.join(values)
516 else:
517 value = unicode(value)
518 value = quoteattr(value)
519 parts.append(u'%s=%s' % (name, value))
520 return u'<%s>' % u' '.join(parts)
522 def endtag(self):
523 return '</%s>' % self.tagname
525 def emptytag(self):
526 return u'<%s/>' % u' '.join([self.tagname] +
527 ['%s="%s"' % (n, v)
528 for n, v in self.attlist()])
530 def __len__(self):
531 return len(self.children)
533 def __contains__(self, key):
534 # support both membership test for children and attributes
535 # (has_key is translated to "in" by 2to3)
536 if isinstance(key, basestring):
537 return key in self.attributes
538 return key in self.children
540 def __getitem__(self, key):
541 if isinstance(key, basestring):
542 return self.attributes[key]
543 elif isinstance(key, int):
544 return self.children[key]
545 elif isinstance(key, types.SliceType):
546 assert key.step in (None, 1), 'cannot handle slice with stride'
547 return self.children[key.start:key.stop]
548 else:
549 raise TypeError, ('element index must be an integer, a slice, or '
550 'an attribute name string')
552 def __setitem__(self, key, item):
553 if isinstance(key, basestring):
554 self.attributes[str(key)] = item
555 elif isinstance(key, int):
556 self.setup_child(item)
557 self.children[key] = item
558 elif isinstance(key, types.SliceType):
559 assert key.step in (None, 1), 'cannot handle slice with stride'
560 for node in item:
561 self.setup_child(node)
562 self.children[key.start:key.stop] = item
563 else:
564 raise TypeError, ('element index must be an integer, a slice, or '
565 'an attribute name string')
567 def __delitem__(self, key):
568 if isinstance(key, basestring):
569 del self.attributes[key]
570 elif isinstance(key, int):
571 del self.children[key]
572 elif isinstance(key, types.SliceType):
573 assert key.step in (None, 1), 'cannot handle slice with stride'
574 del self.children[key.start:key.stop]
575 else:
576 raise TypeError, ('element index must be an integer, a simple '
577 'slice, or an attribute name string')
579 def __add__(self, other):
580 return self.children + other
582 def __radd__(self, other):
583 return other + self.children
585 def __iadd__(self, other):
586 """Append a node or a list of nodes to `self.children`."""
587 if isinstance(other, Node):
588 self.append(other)
589 elif other is not None:
590 self.extend(other)
591 return self
593 def astext(self):
594 return self.child_text_separator.join(
595 [child.astext() for child in self.children])
597 def non_default_attributes(self):
598 atts = {}
599 for key, value in self.attributes.items():
600 if self.is_not_default(key):
601 atts[key] = value
602 return atts
604 def attlist(self):
605 attlist = self.non_default_attributes().items()
606 attlist.sort()
607 return attlist
609 def get(self, key, failobj=None):
610 return self.attributes.get(key, failobj)
612 def hasattr(self, attr):
613 return attr in self.attributes
615 def delattr(self, attr):
616 if attr in self.attributes:
617 del self.attributes[attr]
619 def setdefault(self, key, failobj=None):
620 return self.attributes.setdefault(key, failobj)
622 has_key = hasattr
624 # support operator ``in``
625 __contains__ = hasattr
627 def get_language_code(self, fallback=''):
628 """Return node's language tag.
630 Look iteratively in self and parents for a class argument
631 starting with ``language-`` and return the remainder of it
632 (which should be a `BCP49` language tag) or the `fallback`.
634 for cls in self.get('classes', []):
635 if cls.startswith('language-'):
636 return cls[9:]
637 try:
638 return self.parent.get_language(fallback)
639 except AttributeError:
640 return fallback
642 def append(self, item):
643 self.setup_child(item)
644 self.children.append(item)
646 def extend(self, item):
647 for node in item:
648 self.append(node)
650 def insert(self, index, item):
651 if isinstance(item, Node):
652 self.setup_child(item)
653 self.children.insert(index, item)
654 elif item is not None:
655 self[index:index] = item
657 def pop(self, i=-1):
658 return self.children.pop(i)
660 def remove(self, item):
661 self.children.remove(item)
663 def index(self, item):
664 return self.children.index(item)
666 def is_not_default(self, key):
667 if self[key] == [] and key in self.list_attributes:
668 return 0
669 else:
670 return 1
672 def update_basic_atts(self, dict):
674 Update basic attributes ('ids', 'names', 'classes',
675 'dupnames', but not 'source') from node or dictionary `dict`.
677 if isinstance(dict, Node):
678 dict = dict.attributes
679 for att in ('ids', 'classes', 'names', 'dupnames'):
680 for value in dict.get(att, []):
681 if not value in self[att]:
682 self[att].append(value)
684 def clear(self):
685 self.children = []
687 def replace(self, old, new):
688 """Replace one child `Node` with another child or children."""
689 index = self.index(old)
690 if isinstance(new, Node):
691 self.setup_child(new)
692 self[index] = new
693 elif new is not None:
694 self[index:index+1] = new
696 def replace_self(self, new):
698 Replace `self` node with `new`, where `new` is a node or a
699 list of nodes.
701 update = new
702 if not isinstance(new, Node):
703 # `new` is a list; update first child.
704 try:
705 update = new[0]
706 except IndexError:
707 update = None
708 if isinstance(update, Element):
709 update.update_basic_atts(self)
710 else:
711 # `update` is a Text node or `new` is an empty list.
712 # Assert that we aren't losing any attributes.
713 for att in ('ids', 'names', 'classes', 'dupnames'):
714 assert not self[att], \
715 'Losing "%s" attribute: %s' % (att, self[att])
716 self.parent.replace(self, new)
718 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
720 Return the index of the first child whose class exactly matches.
722 Parameters:
724 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
725 classes. If a tuple, any of the classes may match.
726 - `start`: Initial index to check.
727 - `end`: Initial index to *not* check.
729 if not isinstance(childclass, tuple):
730 childclass = (childclass,)
731 for index in range(start, min(len(self), end)):
732 for c in childclass:
733 if isinstance(self[index], c):
734 return index
735 return None
737 def first_child_not_matching_class(self, childclass, start=0,
738 end=sys.maxint):
740 Return the index of the first child whose class does *not* match.
742 Parameters:
744 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
745 classes. If a tuple, none of the classes may match.
746 - `start`: Initial index to check.
747 - `end`: Initial index to *not* check.
749 if not isinstance(childclass, tuple):
750 childclass = (childclass,)
751 for index in range(start, min(len(self), end)):
752 for c in childclass:
753 if isinstance(self.children[index], c):
754 break
755 else:
756 return index
757 return None
759 def pformat(self, indent=' ', level=0):
760 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
761 [child.pformat(indent, level+1)
762 for child in self.children])
764 def copy(self):
765 return self.__class__(rawsource=self.rawsource, **self.attributes)
767 def deepcopy(self):
768 copy = self.copy()
769 copy.extend([child.deepcopy() for child in self.children])
770 return copy
772 def set_class(self, name):
773 """Add a new class to the "classes" attribute."""
774 warnings.warn('docutils.nodes.Element.set_class deprecated; '
775 "append to Element['classes'] list attribute directly",
776 DeprecationWarning, stacklevel=2)
777 assert ' ' not in name
778 self['classes'].append(name.lower())
780 def note_referenced_by(self, name=None, id=None):
781 """Note that this Element has been referenced by its name
782 `name` or id `id`."""
783 self.referenced = 1
784 # Element.expect_referenced_by_* dictionaries map names or ids
785 # to nodes whose ``referenced`` attribute is set to true as
786 # soon as this node is referenced by the given name or id.
787 # Needed for target propagation.
788 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
789 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
790 if by_name:
791 assert name is not None
792 by_name.referenced = 1
793 if by_id:
794 assert id is not None
795 by_id.referenced = 1
798 class TextElement(Element):
801 An element which directly contains text.
803 Its children are all `Text` or `Inline` subclass nodes. You can
804 check whether an element's context is inline simply by checking whether
805 its immediate parent is a `TextElement` instance (including subclasses).
806 This is handy for nodes like `image` that can appear both inline and as
807 standalone body elements.
809 If passing children to `__init__()`, make sure to set `text` to
810 ``''`` or some other suitable value.
813 child_text_separator = ''
814 """Separator for child nodes, used by `astext()` method."""
816 def __init__(self, rawsource='', text='', *children, **attributes):
817 if text != '':
818 textnode = Text(text)
819 Element.__init__(self, rawsource, textnode, *children,
820 **attributes)
821 else:
822 Element.__init__(self, rawsource, *children, **attributes)
825 class FixedTextElement(TextElement):
827 """An element which directly contains preformatted text."""
829 def __init__(self, rawsource='', text='', *children, **attributes):
830 TextElement.__init__(self, rawsource, text, *children, **attributes)
831 self.attributes['xml:space'] = 'preserve'
834 # ========
835 # Mixins
836 # ========
838 class Resolvable:
840 resolved = 0
843 class BackLinkable:
845 def add_backref(self, refid):
846 self['backrefs'].append(refid)
849 # ====================
850 # Element Categories
851 # ====================
853 class Root: pass
855 class Titular: pass
857 class PreBibliographic:
858 """Category of Node which may occur before Bibliographic Nodes."""
860 class Bibliographic: pass
862 class Decorative(PreBibliographic): pass
864 class Structural: pass
866 class Body: pass
868 class General(Body): pass
870 class Sequential(Body):
871 """List-like elements."""
873 class Admonition(Body): pass
875 class Special(Body):
876 """Special internal body elements."""
878 class Invisible(PreBibliographic):
879 """Internal elements that don't appear in output."""
881 class Part: pass
883 class Inline: pass
885 class Referential(Resolvable): pass
888 class Targetable(Resolvable):
890 referenced = 0
892 indirect_reference_name = None
893 """Holds the whitespace_normalized_name (contains mixed case) of a target.
894 Required for MoinMoin/reST compatibility."""
897 class Labeled:
898 """Contains a `label` as its first element."""
901 # ==============
902 # Root Element
903 # ==============
905 class document(Root, Structural, Element):
908 The document root element.
910 Do not instantiate this class directly; use
911 `docutils.utils.new_document()` instead.
914 def __init__(self, settings, reporter, *args, **kwargs):
915 Element.__init__(self, *args, **kwargs)
917 self.current_source = None
918 """Path to or description of the input source being processed."""
920 self.current_line = None
921 """Line number (1-based) of `current_source`."""
923 self.settings = settings
924 """Runtime settings data record."""
926 self.reporter = reporter
927 """System message generator."""
929 self.indirect_targets = []
930 """List of indirect target nodes."""
932 self.substitution_defs = {}
933 """Mapping of substitution names to substitution_definition nodes."""
935 self.substitution_names = {}
936 """Mapping of case-normalized substitution names to case-sensitive
937 names."""
939 self.refnames = {}
940 """Mapping of names to lists of referencing nodes."""
942 self.refids = {}
943 """Mapping of ids to lists of referencing nodes."""
945 self.nameids = {}
946 """Mapping of names to unique id's."""
948 self.nametypes = {}
949 """Mapping of names to hyperlink type (boolean: True => explicit,
950 False => implicit."""
952 self.ids = {}
953 """Mapping of ids to nodes."""
955 self.footnote_refs = {}
956 """Mapping of footnote labels to lists of footnote_reference nodes."""
958 self.citation_refs = {}
959 """Mapping of citation labels to lists of citation_reference nodes."""
961 self.autofootnotes = []
962 """List of auto-numbered footnote nodes."""
964 self.autofootnote_refs = []
965 """List of auto-numbered footnote_reference nodes."""
967 self.symbol_footnotes = []
968 """List of symbol footnote nodes."""
970 self.symbol_footnote_refs = []
971 """List of symbol footnote_reference nodes."""
973 self.footnotes = []
974 """List of manually-numbered footnote nodes."""
976 self.citations = []
977 """List of citation nodes."""
979 self.autofootnote_start = 1
980 """Initial auto-numbered footnote number."""
982 self.symbol_footnote_start = 0
983 """Initial symbol footnote symbol index."""
985 self.id_start = 1
986 """Initial ID number."""
988 self.parse_messages = []
989 """System messages generated while parsing."""
991 self.transform_messages = []
992 """System messages generated while applying transforms."""
994 import docutils.transforms
995 self.transformer = docutils.transforms.Transformer(self)
996 """Storage for transforms to be applied to this document."""
998 self.decoration = None
999 """Document's `decoration` node."""
1001 self.document = self
1003 def __getstate__(self):
1005 Return dict with unpicklable references removed.
1007 state = self.__dict__.copy()
1008 state['reporter'] = None
1009 state['transformer'] = None
1010 return state
1012 def asdom(self, dom=None):
1013 """Return a DOM representation of this document."""
1014 if dom is None:
1015 import xml.dom.minidom as dom
1016 domroot = dom.Document()
1017 domroot.appendChild(self._dom_node(domroot))
1018 return domroot
1020 def set_id(self, node, msgnode=None):
1021 for id in node['ids']:
1022 if id in self.ids and self.ids[id] is not node:
1023 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1024 if msgnode != None:
1025 msgnode += msg
1026 if not node['ids']:
1027 for name in node['names']:
1028 id = self.settings.id_prefix + make_id(name)
1029 if id and id not in self.ids:
1030 break
1031 else:
1032 id = ''
1033 while not id or id in self.ids:
1034 id = (self.settings.id_prefix +
1035 self.settings.auto_id_prefix + str(self.id_start))
1036 self.id_start += 1
1037 node['ids'].append(id)
1038 self.ids[id] = node
1039 return id
1041 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1043 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1044 booleans representing hyperlink type (True==explicit,
1045 False==implicit). This method updates the mappings.
1047 The following state transition table shows how `self.nameids` ("ids")
1048 and `self.nametypes` ("types") change with new input (a call to this
1049 method), and what actions are performed ("implicit"-type system
1050 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1052 ==== ===== ======== ======== ======= ==== ===== =====
1053 Old State Input Action New State Notes
1054 ----------- -------- ----------------- ----------- -----
1055 ids types new type sys.msg. dupname ids types
1056 ==== ===== ======== ======== ======= ==== ===== =====
1057 - - explicit - - new True
1058 - - implicit - - new False
1059 None False explicit - - new True
1060 old False explicit implicit old new True
1061 None True explicit explicit new None True
1062 old True explicit explicit new,old None True [#]_
1063 None False implicit implicit new None False
1064 old False implicit implicit new,old None False
1065 None True implicit implicit new None True
1066 old True implicit implicit new old True
1067 ==== ===== ======== ======== ======= ==== ===== =====
1069 .. [#] Do not clear the name-to-id map or invalidate the old target if
1070 both old and new targets are external and refer to identical URIs.
1071 The new target is invalidated regardless.
1073 for name in node['names']:
1074 if name in self.nameids:
1075 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1076 else:
1077 self.nameids[name] = id
1078 self.nametypes[name] = explicit
1080 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1081 old_id = self.nameids[name]
1082 old_explicit = self.nametypes[name]
1083 self.nametypes[name] = old_explicit or explicit
1084 if explicit:
1085 if old_explicit:
1086 level = 2
1087 if old_id is not None:
1088 old_node = self.ids[old_id]
1089 if 'refuri' in node:
1090 refuri = node['refuri']
1091 if old_node['names'] \
1092 and 'refuri' in old_node \
1093 and old_node['refuri'] == refuri:
1094 level = 1 # just inform if refuri's identical
1095 if level > 1:
1096 dupname(old_node, name)
1097 self.nameids[name] = None
1098 msg = self.reporter.system_message(
1099 level, 'Duplicate explicit target name: "%s".' % name,
1100 backrefs=[id], base_node=node)
1101 if msgnode != None:
1102 msgnode += msg
1103 dupname(node, name)
1104 else:
1105 self.nameids[name] = id
1106 if old_id is not None:
1107 old_node = self.ids[old_id]
1108 dupname(old_node, name)
1109 else:
1110 if old_id is not None and not old_explicit:
1111 self.nameids[name] = None
1112 old_node = self.ids[old_id]
1113 dupname(old_node, name)
1114 dupname(node, name)
1115 if not explicit or (not old_explicit and old_id is not None):
1116 msg = self.reporter.info(
1117 'Duplicate implicit target name: "%s".' % name,
1118 backrefs=[id], base_node=node)
1119 if msgnode != None:
1120 msgnode += msg
1122 def has_name(self, name):
1123 return name in self.nameids
1125 # "note" here is an imperative verb: "take note of".
1126 def note_implicit_target(self, target, msgnode=None):
1127 id = self.set_id(target, msgnode)
1128 self.set_name_id_map(target, id, msgnode, explicit=None)
1130 def note_explicit_target(self, target, msgnode=None):
1131 id = self.set_id(target, msgnode)
1132 self.set_name_id_map(target, id, msgnode, explicit=True)
1134 def note_refname(self, node):
1135 self.refnames.setdefault(node['refname'], []).append(node)
1137 def note_refid(self, node):
1138 self.refids.setdefault(node['refid'], []).append(node)
1140 def note_indirect_target(self, target):
1141 self.indirect_targets.append(target)
1142 if target['names']:
1143 self.note_refname(target)
1145 def note_anonymous_target(self, target):
1146 self.set_id(target)
1148 def note_autofootnote(self, footnote):
1149 self.set_id(footnote)
1150 self.autofootnotes.append(footnote)
1152 def note_autofootnote_ref(self, ref):
1153 self.set_id(ref)
1154 self.autofootnote_refs.append(ref)
1156 def note_symbol_footnote(self, footnote):
1157 self.set_id(footnote)
1158 self.symbol_footnotes.append(footnote)
1160 def note_symbol_footnote_ref(self, ref):
1161 self.set_id(ref)
1162 self.symbol_footnote_refs.append(ref)
1164 def note_footnote(self, footnote):
1165 self.set_id(footnote)
1166 self.footnotes.append(footnote)
1168 def note_footnote_ref(self, ref):
1169 self.set_id(ref)
1170 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1171 self.note_refname(ref)
1173 def note_citation(self, citation):
1174 self.citations.append(citation)
1176 def note_citation_ref(self, ref):
1177 self.set_id(ref)
1178 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1179 self.note_refname(ref)
1181 def note_substitution_def(self, subdef, def_name, msgnode=None):
1182 name = whitespace_normalize_name(def_name)
1183 if name in self.substitution_defs:
1184 msg = self.reporter.error(
1185 'Duplicate substitution definition name: "%s".' % name,
1186 base_node=subdef)
1187 if msgnode != None:
1188 msgnode += msg
1189 oldnode = self.substitution_defs[name]
1190 dupname(oldnode, name)
1191 # keep only the last definition:
1192 self.substitution_defs[name] = subdef
1193 # case-insensitive mapping:
1194 self.substitution_names[fully_normalize_name(name)] = name
1196 def note_substitution_ref(self, subref, refname):
1197 subref['refname'] = whitespace_normalize_name(refname)
1199 def note_pending(self, pending, priority=None):
1200 self.transformer.add_pending(pending, priority)
1202 def note_parse_message(self, message):
1203 self.parse_messages.append(message)
1205 def note_transform_message(self, message):
1206 self.transform_messages.append(message)
1208 def note_source(self, source, offset):
1209 self.current_source = source
1210 if offset is None:
1211 self.current_line = offset
1212 else:
1213 self.current_line = offset + 1
1215 def copy(self):
1216 return self.__class__(self.settings, self.reporter,
1217 **self.attributes)
1219 def get_decoration(self):
1220 if not self.decoration:
1221 self.decoration = decoration()
1222 index = self.first_child_not_matching_class(Titular)
1223 if index is None:
1224 self.append(self.decoration)
1225 else:
1226 self.insert(index, self.decoration)
1227 return self.decoration
1230 # ================
1231 # Title Elements
1232 # ================
1234 class title(Titular, PreBibliographic, TextElement): pass
1235 class subtitle(Titular, PreBibliographic, TextElement): pass
1236 class rubric(Titular, TextElement): pass
1239 # ========================
1240 # Bibliographic Elements
1241 # ========================
1243 class docinfo(Bibliographic, Element): pass
1244 class author(Bibliographic, TextElement): pass
1245 class authors(Bibliographic, Element): pass
1246 class organization(Bibliographic, TextElement): pass
1247 class address(Bibliographic, FixedTextElement): pass
1248 class contact(Bibliographic, TextElement): pass
1249 class version(Bibliographic, TextElement): pass
1250 class revision(Bibliographic, TextElement): pass
1251 class status(Bibliographic, TextElement): pass
1252 class date(Bibliographic, TextElement): pass
1253 class copyright(Bibliographic, TextElement): pass
1256 # =====================
1257 # Decorative Elements
1258 # =====================
1260 class decoration(Decorative, Element):
1262 def get_header(self):
1263 if not len(self.children) or not isinstance(self.children[0], header):
1264 self.insert(0, header())
1265 return self.children[0]
1267 def get_footer(self):
1268 if not len(self.children) or not isinstance(self.children[-1], footer):
1269 self.append(footer())
1270 return self.children[-1]
1273 class header(Decorative, Element): pass
1274 class footer(Decorative, Element): pass
1277 # =====================
1278 # Structural Elements
1279 # =====================
1281 class section(Structural, Element): pass
1284 class topic(Structural, Element):
1287 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1288 or textual figures. A topic is just like a section, except that it has no
1289 subsections, and it doesn't have to conform to section placement rules.
1291 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1292 but only at the top level of a section or document. Topics cannot nest
1293 inside topics, sidebars, or body elements; you can't have a topic inside a
1294 table, list, block quote, etc.
1298 class sidebar(Structural, Element):
1301 Sidebars are like miniature, parallel documents that occur inside other
1302 documents, providing related or reference material. A sidebar is
1303 typically offset by a border and "floats" to the side of the page; the
1304 document's main text may flow around it. Sidebars can also be likened to
1305 super-footnotes; their content is outside of the flow of the document's
1306 main text.
1308 Sidebars are allowed wherever body elements (list, table, etc.) are
1309 allowed, but only at the top level of a section or document. Sidebars
1310 cannot nest inside sidebars, topics, or body elements; you can't have a
1311 sidebar inside a table, list, block quote, etc.
1315 class transition(Structural, Element): pass
1318 # ===============
1319 # Body Elements
1320 # ===============
1322 class paragraph(General, TextElement): pass
1323 class compound(General, Element): pass
1324 class container(General, Element): pass
1325 class bullet_list(Sequential, Element): pass
1326 class enumerated_list(Sequential, Element): pass
1327 class list_item(Part, Element): pass
1328 class definition_list(Sequential, Element): pass
1329 class definition_list_item(Part, Element): pass
1330 class term(Part, TextElement): pass
1331 class classifier(Part, TextElement): pass
1332 class definition(Part, Element): pass
1333 class field_list(Sequential, Element): pass
1334 class field(Part, Element): pass
1335 class field_name(Part, TextElement): pass
1336 class field_body(Part, Element): pass
1339 class option(Part, Element):
1341 child_text_separator = ''
1344 class option_argument(Part, TextElement):
1346 def astext(self):
1347 return self.get('delimiter', ' ') + TextElement.astext(self)
1350 class option_group(Part, Element):
1352 child_text_separator = ', '
1355 class option_list(Sequential, Element): pass
1358 class option_list_item(Part, Element):
1360 child_text_separator = ' '
1363 class option_string(Part, TextElement): pass
1364 class description(Part, Element): pass
1365 class literal_block(General, FixedTextElement): pass
1366 class doctest_block(General, FixedTextElement): pass
1367 class math_block(General, FixedTextElement): pass
1368 class line_block(General, Element): pass
1371 class line(Part, TextElement):
1373 indent = None
1376 class block_quote(General, Element): pass
1377 class attribution(Part, TextElement): pass
1378 class attention(Admonition, Element): pass
1379 class caution(Admonition, Element): pass
1380 class danger(Admonition, Element): pass
1381 class error(Admonition, Element): pass
1382 class important(Admonition, Element): pass
1383 class note(Admonition, Element): pass
1384 class tip(Admonition, Element): pass
1385 class hint(Admonition, Element): pass
1386 class warning(Admonition, Element): pass
1387 class admonition(Admonition, Element): pass
1388 class comment(Special, Invisible, FixedTextElement): pass
1389 class substitution_definition(Special, Invisible, TextElement): pass
1390 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1391 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1392 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1393 class label(Part, TextElement): pass
1394 class figure(General, Element): pass
1395 class caption(Part, TextElement): pass
1396 class legend(Part, Element): pass
1397 class table(General, Element): pass
1398 class tgroup(Part, Element): pass
1399 class colspec(Part, Element): pass
1400 class thead(Part, Element): pass
1401 class tbody(Part, Element): pass
1402 class row(Part, Element): pass
1403 class entry(Part, Element): pass
1406 class system_message(Special, BackLinkable, PreBibliographic, Element):
1409 System message element.
1411 Do not instantiate this class directly; use
1412 ``document.reporter.info/warning/error/severe()`` instead.
1415 def __init__(self, message=None, *children, **attributes):
1416 if message:
1417 p = paragraph('', message)
1418 children = (p,) + children
1419 try:
1420 Element.__init__(self, '', *children, **attributes)
1421 except:
1422 print 'system_message: children=%r' % (children,)
1423 raise
1425 def astext(self):
1426 line = self.get('line', '')
1427 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1428 self['level'], Element.astext(self))
1431 class pending(Special, Invisible, Element):
1434 The "pending" element is used to encapsulate a pending operation: the
1435 operation (transform), the point at which to apply it, and any data it
1436 requires. Only the pending operation's location within the document is
1437 stored in the public document tree (by the "pending" object itself); the
1438 operation and its data are stored in the "pending" object's internal
1439 instance attributes.
1441 For example, say you want a table of contents in your reStructuredText
1442 document. The easiest way to specify where to put it is from within the
1443 document, with a directive::
1445 .. contents::
1447 But the "contents" directive can't do its work until the entire document
1448 has been parsed and possibly transformed to some extent. So the directive
1449 code leaves a placeholder behind that will trigger the second phase of its
1450 processing, something like this::
1452 <pending ...public attributes...> + internal attributes
1454 Use `document.note_pending()` so that the
1455 `docutils.transforms.Transformer` stage of processing can run all pending
1456 transforms.
1459 def __init__(self, transform, details=None,
1460 rawsource='', *children, **attributes):
1461 Element.__init__(self, rawsource, *children, **attributes)
1463 self.transform = transform
1464 """The `docutils.transforms.Transform` class implementing the pending
1465 operation."""
1467 self.details = details or {}
1468 """Detail data (dictionary) required by the pending operation."""
1470 def pformat(self, indent=' ', level=0):
1471 internals = [
1472 '.. internal attributes:',
1473 ' .transform: %s.%s' % (self.transform.__module__,
1474 self.transform.__name__),
1475 ' .details:']
1476 details = self.details.items()
1477 details.sort()
1478 for key, value in details:
1479 if isinstance(value, Node):
1480 internals.append('%7s%s:' % ('', key))
1481 internals.extend(['%9s%s' % ('', line)
1482 for line in value.pformat().splitlines()])
1483 elif value and isinstance(value, list) \
1484 and isinstance(value[0], Node):
1485 internals.append('%7s%s:' % ('', key))
1486 for v in value:
1487 internals.extend(['%9s%s' % ('', line)
1488 for line in v.pformat().splitlines()])
1489 else:
1490 internals.append('%7s%s: %r' % ('', key, value))
1491 return (Element.pformat(self, indent, level)
1492 + ''.join([(' %s%s\n' % (indent * level, line))
1493 for line in internals]))
1495 def copy(self):
1496 return self.__class__(self.transform, self.details, self.rawsource,
1497 **self.attributes)
1500 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1503 Raw data that is to be passed untouched to the Writer.
1506 pass
1509 # =================
1510 # Inline Elements
1511 # =================
1513 class emphasis(Inline, TextElement): pass
1514 class strong(Inline, TextElement): pass
1515 class literal(Inline, TextElement): pass
1516 class reference(General, Inline, Referential, TextElement): pass
1517 class footnote_reference(Inline, Referential, TextElement): pass
1518 class citation_reference(Inline, Referential, TextElement): pass
1519 class substitution_reference(Inline, TextElement): pass
1520 class title_reference(Inline, TextElement): pass
1521 class abbreviation(Inline, TextElement): pass
1522 class acronym(Inline, TextElement): pass
1523 class superscript(Inline, TextElement): pass
1524 class subscript(Inline, TextElement): pass
1525 class math(Inline, TextElement): pass
1528 class image(General, Inline, Element):
1530 def astext(self):
1531 return self.get('alt', '')
1534 class inline(Inline, TextElement): pass
1535 class problematic(Inline, TextElement): pass
1536 class generated(Inline, TextElement): pass
1539 # ========================================
1540 # Auxiliary Classes, Functions, and Data
1541 # ========================================
1543 node_class_names = """
1544 Text
1545 abbreviation acronym address admonition attention attribution author
1546 authors
1547 block_quote bullet_list
1548 caption caution citation citation_reference classifier colspec comment
1549 compound contact container copyright
1550 danger date decoration definition definition_list definition_list_item
1551 description docinfo doctest_block document
1552 emphasis entry enumerated_list error
1553 field field_body field_list field_name figure footer
1554 footnote footnote_reference
1555 generated
1556 header hint
1557 image important inline
1558 label legend line line_block list_item literal literal_block
1559 math math_block
1560 note
1561 option option_argument option_group option_list option_list_item
1562 option_string organization
1563 paragraph pending problematic
1564 raw reference revision row rubric
1565 section sidebar status strong subscript substitution_definition
1566 substitution_reference subtitle superscript system_message
1567 table target tbody term tgroup thead tip title title_reference topic
1568 transition
1569 version
1570 warning""".split()
1571 """A list of names of all concrete Node subclasses."""
1574 class NodeVisitor:
1577 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1578 document tree traversals.
1580 Each node class has corresponding methods, doing nothing by
1581 default; override individual methods for specific and useful
1582 behaviour. The `dispatch_visit()` method is called by
1583 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1584 the `dispatch_departure()` method before exiting a node.
1586 The dispatch methods call "``visit_`` + node class name" or
1587 "``depart_`` + node class name", resp.
1589 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1590 methods should be implemented for *all* node types encountered (such as
1591 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1592 raise exceptions.
1594 For sparse traversals, where only certain node types are of interest,
1595 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1596 processing is desired, subclass `GenericNodeVisitor`.
1598 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1599 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1600 1995.
1603 optional = ()
1605 Tuple containing node class names (as strings).
1607 No exception will be raised if writers do not implement visit
1608 or departure functions for these node classes.
1610 Used to ensure transitional compatibility with existing 3rd-party writers.
1613 def __init__(self, document):
1614 self.document = document
1616 def dispatch_visit(self, node):
1618 Call self."``visit_`` + node class name" with `node` as
1619 parameter. If the ``visit_...`` method does not exist, call
1620 self.unknown_visit.
1622 node_name = node.__class__.__name__
1623 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1624 self.document.reporter.debug(
1625 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1626 % (method.__name__, node_name))
1627 return method(node)
1629 def dispatch_departure(self, node):
1631 Call self."``depart_`` + node class name" with `node` as
1632 parameter. If the ``depart_...`` method does not exist, call
1633 self.unknown_departure.
1635 node_name = node.__class__.__name__
1636 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1637 self.document.reporter.debug(
1638 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1639 % (method.__name__, node_name))
1640 return method(node)
1642 def unknown_visit(self, node):
1644 Called when entering unknown `Node` types.
1646 Raise an exception unless overridden.
1648 if (self.document.settings.strict_visitor
1649 or node.__class__.__name__ not in self.optional):
1650 raise NotImplementedError(
1651 '%s visiting unknown node type: %s'
1652 % (self.__class__, node.__class__.__name__))
1654 def unknown_departure(self, node):
1656 Called before exiting unknown `Node` types.
1658 Raise exception unless overridden.
1660 if (self.document.settings.strict_visitor
1661 or node.__class__.__name__ not in self.optional):
1662 raise NotImplementedError(
1663 '%s departing unknown node type: %s'
1664 % (self.__class__, node.__class__.__name__))
1667 class SparseNodeVisitor(NodeVisitor):
1670 Base class for sparse traversals, where only certain node types are of
1671 interest. When ``visit_...`` & ``depart_...`` methods should be
1672 implemented for *all* node types (such as for `docutils.writers.Writer`
1673 subclasses), subclass `NodeVisitor` instead.
1677 class GenericNodeVisitor(NodeVisitor):
1680 Generic "Visitor" abstract superclass, for simple traversals.
1682 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1683 each ``depart_...`` method (when using `Node.walkabout()`) calls
1684 `default_departure()`. `default_visit()` (and `default_departure()`) must
1685 be overridden in subclasses.
1687 Define fully generic visitors by overriding `default_visit()` (and
1688 `default_departure()`) only. Define semi-generic visitors by overriding
1689 individual ``visit_...()`` (and ``depart_...()``) methods also.
1691 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1692 be overridden for default behavior.
1695 def default_visit(self, node):
1696 """Override for generic, uniform traversals."""
1697 raise NotImplementedError
1699 def default_departure(self, node):
1700 """Override for generic, uniform traversals."""
1701 raise NotImplementedError
1703 def _call_default_visit(self, node):
1704 self.default_visit(node)
1706 def _call_default_departure(self, node):
1707 self.default_departure(node)
1709 def _nop(self, node):
1710 pass
1712 def _add_node_class_names(names):
1713 """Save typing with dynamic assignments:"""
1714 for _name in names:
1715 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1716 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1717 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1718 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1720 _add_node_class_names(node_class_names)
1723 class TreeCopyVisitor(GenericNodeVisitor):
1726 Make a complete copy of a tree or branch, including element attributes.
1729 def __init__(self, document):
1730 GenericNodeVisitor.__init__(self, document)
1731 self.parent_stack = []
1732 self.parent = []
1734 def get_tree_copy(self):
1735 return self.parent[0]
1737 def default_visit(self, node):
1738 """Copy the current node, and make it the new acting parent."""
1739 newnode = node.copy()
1740 self.parent.append(newnode)
1741 self.parent_stack.append(self.parent)
1742 self.parent = newnode
1744 def default_departure(self, node):
1745 """Restore the previous acting parent."""
1746 self.parent = self.parent_stack.pop()
1749 class TreePruningException(Exception):
1752 Base class for `NodeVisitor`-related tree pruning exceptions.
1754 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1755 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1756 the tree traversed.
1759 pass
1762 class SkipChildren(TreePruningException):
1765 Do not visit any children of the current node. The current node's
1766 siblings and ``depart_...`` method are not affected.
1769 pass
1772 class SkipSiblings(TreePruningException):
1775 Do not visit any more siblings (to the right) of the current node. The
1776 current node's children and its ``depart_...`` method are not affected.
1779 pass
1782 class SkipNode(TreePruningException):
1785 Do not visit the current node's children, and do not call the current
1786 node's ``depart_...`` method.
1789 pass
1792 class SkipDeparture(TreePruningException):
1795 Do not call the current node's ``depart_...`` method. The current node's
1796 children and siblings are not affected.
1799 pass
1802 class NodeFound(TreePruningException):
1805 Raise to indicate that the target of a search has been found. This
1806 exception must be caught by the client; it is not caught by the traversal
1807 code.
1810 pass
1813 class StopTraversal(TreePruningException):
1816 Stop the traversal alltogether. The current node's ``depart_...`` method
1817 is not affected. The parent nodes ``depart_...`` methods are also called
1818 as usual. No other nodes are visited. This is an alternative to
1819 NodeFound that does not cause exception handling to trickle up to the
1820 caller.
1823 pass
1826 def make_id(string):
1828 Convert `string` into an identifier and return it.
1830 Docutils identifiers will conform to the regular expression
1831 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1832 and "id" attributes) should have no underscores, colons, or periods.
1833 Hyphens may be used.
1835 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1837 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1838 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1839 underscores ("_"), colons (":"), and periods (".").
1841 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1842 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1843 "escape" 8-bit characters have been replaced with entities)::
1845 unicode \\[0-9a-f]{1,4}
1846 latin1 [&iexcl;-&yuml;]
1847 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1848 nmchar [-a-z0-9]|{latin1}|{escape}
1849 name {nmchar}+
1851 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1852 or periods ("."), therefore "class" and "id" attributes should not contain
1853 these characters. They should be replaced with hyphens ("-"). Combined
1854 with HTML's requirements (the first character must be a letter; no
1855 "unicode", "latin1", or "escape" characters), this results in the
1856 ``[a-z](-?[a-z0-9]+)*`` pattern.
1858 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1859 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1861 id = string.lower()
1862 if not isinstance(id, unicode):
1863 id = id.decode()
1864 id = id.translate(_non_id_translate_digraphs)
1865 id = id.translate(_non_id_translate)
1866 # get rid of non-ascii characters.
1867 # 'ascii' lowercase to prevent problems with turkish locale.
1868 id = unicodedata.normalize('NFKD', id).\
1869 encode('ascii', 'ignore').decode('ascii')
1870 # shrink runs of whitespace and replace by hyphen
1871 id = _non_id_chars.sub('-', ' '.join(id.split()))
1872 id = _non_id_at_ends.sub('', id)
1873 return str(id)
1875 _non_id_chars = re.compile('[^a-z0-9]+')
1876 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1877 _non_id_translate = {
1878 0x00f8: u'o', # o with stroke
1879 0x0111: u'd', # d with stroke
1880 0x0127: u'h', # h with stroke
1881 0x0131: u'i', # dotless i
1882 0x0142: u'l', # l with stroke
1883 0x0167: u't', # t with stroke
1884 0x0180: u'b', # b with stroke
1885 0x0183: u'b', # b with topbar
1886 0x0188: u'c', # c with hook
1887 0x018c: u'd', # d with topbar
1888 0x0192: u'f', # f with hook
1889 0x0199: u'k', # k with hook
1890 0x019a: u'l', # l with bar
1891 0x019e: u'n', # n with long right leg
1892 0x01a5: u'p', # p with hook
1893 0x01ab: u't', # t with palatal hook
1894 0x01ad: u't', # t with hook
1895 0x01b4: u'y', # y with hook
1896 0x01b6: u'z', # z with stroke
1897 0x01e5: u'g', # g with stroke
1898 0x0225: u'z', # z with hook
1899 0x0234: u'l', # l with curl
1900 0x0235: u'n', # n with curl
1901 0x0236: u't', # t with curl
1902 0x0237: u'j', # dotless j
1903 0x023c: u'c', # c with stroke
1904 0x023f: u's', # s with swash tail
1905 0x0240: u'z', # z with swash tail
1906 0x0247: u'e', # e with stroke
1907 0x0249: u'j', # j with stroke
1908 0x024b: u'q', # q with hook tail
1909 0x024d: u'r', # r with stroke
1910 0x024f: u'y', # y with stroke
1912 _non_id_translate_digraphs = {
1913 0x00df: u'sz', # ligature sz
1914 0x00e6: u'ae', # ae
1915 0x0153: u'oe', # ligature oe
1916 0x0238: u'db', # db digraph
1917 0x0239: u'qp', # qp digraph
1920 def dupname(node, name):
1921 node['dupnames'].append(name)
1922 node['names'].remove(name)
1923 # Assume that this method is referenced, even though it isn't; we
1924 # don't want to throw unnecessary system_messages.
1925 node.referenced = 1
1927 def fully_normalize_name(name):
1928 """Return a case- and whitespace-normalized name."""
1929 return ' '.join(name.lower().split())
1931 def whitespace_normalize_name(name):
1932 """Return a whitespace-normalized name."""
1933 return ' '.join(name.split())
1935 def serial_escape(value):
1936 """Escape string values that are elements of a list, for serialization."""
1937 return value.replace('\\', r'\\').replace(' ', r'\ ')
1939 def pseudo_quoteattr(value):
1940 """Quote attributes for pseudo-xml"""
1941 return '"%s"' % value
1943 # \f
1945 # Local Variables:
1946 # indent-tabs-mode: nil
1947 # sentence-end-double-space: t
1948 # fill-column: 78
1949 # End: