Allow also non-ASCII whitespace characters around inline markup.
[docutils.git] / docutils / nodes.py
blob9a868cd1b40a4c03ad3632d62c95c4b7fb200ebe
1 # $Id$
2 # Author: David Goodger <goodger@python.org>
3 # Copyright: This module has been placed in the public domain.
5 """
6 Docutils document tree element class library.
8 Classes in CamelCase are abstract base classes or auxiliary classes. The one
9 exception is `Text`, for a text (PCDATA) node; uppercase is used to
10 differentiate from element classes. Classes in lower_case_with_underscores
11 are element classes, matching the XML element generic identifiers in the DTD_.
13 The position of each node (the level at which it can occur) is significant and
14 is represented by abstract base classes (`Root`, `Structural`, `Body`,
15 `Inline`, etc.). Certain transformations will be easier because we can use
16 ``isinstance(node, base_class)`` to determine the position of the node in the
17 hierarchy.
19 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
20 """
22 __docformat__ = 'reStructuredText'
24 import sys
25 import os
26 import re
27 import warnings
28 import types
29 import unicodedata
31 # ==============================
32 # Functional Node Base Classes
33 # ==============================
35 class Node(object):
37 """Abstract base class of nodes in a document tree."""
39 parent = None
40 """Back-reference to the Node immediately containing this Node."""
42 document = None
43 """The `document` node at the root of the tree containing this Node."""
45 source = None
46 """Path or description of the input source which generated this Node."""
48 line = None
49 """The line number (1-based) of the beginning of this Node in `source`."""
51 def __nonzero__(self):
52 """
53 Node instances are always true, even if they're empty. A node is more
54 than a simple container. Its boolean "truth" does not depend on
55 having one or more subnodes in the doctree.
57 Use `len()` to check node length. Use `None` to represent a boolean
58 false value.
59 """
60 return True
62 if sys.version_info < (3,):
63 # on 2.x, str(node) will be a byte string with Unicode
64 # characters > 255 escaped; on 3.x this is no longer necessary
65 def __str__(self):
66 return unicode(self).encode('raw_unicode_escape')
68 def asdom(self, dom=None):
69 """Return a DOM **fragment** representation of this Node."""
70 if dom is None:
71 import xml.dom.minidom as dom
72 domroot = dom.Document()
73 return self._dom_node(domroot)
75 def pformat(self, indent=' ', level=0):
76 """
77 Return an indented pseudo-XML representation, for test purposes.
79 Override in subclasses.
80 """
81 raise NotImplementedError
83 def copy(self):
84 """Return a copy of self."""
85 raise NotImplementedError
87 def deepcopy(self):
88 """Return a deep copy of self (also copying children)."""
89 raise NotImplementedError
91 def setup_child(self, child):
92 child.parent = self
93 if self.document:
94 child.document = self.document
95 if child.source is None:
96 child.source = self.document.current_source
97 if child.line is None:
98 child.line = self.document.current_line
100 def walk(self, visitor):
102 Traverse a tree of `Node` objects, calling the
103 `dispatch_visit()` method of `visitor` when entering each
104 node. (The `walkabout()` method is similar, except it also
105 calls the `dispatch_departure()` method before exiting each
106 node.)
108 This tree traversal supports limited in-place tree
109 modifications. Replacing one node with one or more nodes is
110 OK, as is removing an element. However, if the node removed
111 or replaced occurs after the current node, the old node will
112 still be traversed, and any new nodes will not.
114 Within ``visit`` methods (and ``depart`` methods for
115 `walkabout()`), `TreePruningException` subclasses may be raised
116 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
118 Parameter `visitor`: A `NodeVisitor` object, containing a
119 ``visit`` implementation for each `Node` subclass encountered.
121 Return true if we should stop the traversal.
123 stop = 0
124 visitor.document.reporter.debug(
125 'docutils.nodes.Node.walk calling dispatch_visit for %s'
126 % self.__class__.__name__)
127 try:
128 try:
129 visitor.dispatch_visit(self)
130 except (SkipChildren, SkipNode):
131 return stop
132 except SkipDeparture: # not applicable; ignore
133 pass
134 children = self.children
135 try:
136 for child in children[:]:
137 if child.walk(visitor):
138 stop = 1
139 break
140 except SkipSiblings:
141 pass
142 except StopTraversal:
143 stop = 1
144 return stop
146 def walkabout(self, visitor):
148 Perform a tree traversal similarly to `Node.walk()` (which
149 see), except also call the `dispatch_departure()` method
150 before exiting each node.
152 Parameter `visitor`: A `NodeVisitor` object, containing a
153 ``visit`` and ``depart`` implementation for each `Node`
154 subclass encountered.
156 Return true if we should stop the traversal.
158 call_depart = 1
159 stop = 0
160 visitor.document.reporter.debug(
161 'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
162 % self.__class__.__name__)
163 try:
164 try:
165 visitor.dispatch_visit(self)
166 except SkipNode:
167 return stop
168 except SkipDeparture:
169 call_depart = 0
170 children = self.children
171 try:
172 for child in children[:]:
173 if child.walkabout(visitor):
174 stop = 1
175 break
176 except SkipSiblings:
177 pass
178 except SkipChildren:
179 pass
180 except StopTraversal:
181 stop = 1
182 if call_depart:
183 visitor.document.reporter.debug(
184 'docutils.nodes.Node.walkabout calling dispatch_departure '
185 'for %s' % self.__class__.__name__)
186 visitor.dispatch_departure(self)
187 return stop
189 def _fast_traverse(self, cls):
190 """Specialized traverse() that only supports instance checks."""
191 result = []
192 if isinstance(self, cls):
193 result.append(self)
194 for child in self.children:
195 result.extend(child._fast_traverse(cls))
196 return result
198 def _all_traverse(self):
199 """Specialized traverse() that doesn't check for a condition."""
200 result = []
201 result.append(self)
202 for child in self.children:
203 result.extend(child._all_traverse())
204 return result
206 def traverse(self, condition=None,
207 include_self=1, descend=1, siblings=0, ascend=0):
209 Return an iterable containing
211 * self (if include_self is true)
212 * all descendants in tree traversal order (if descend is true)
213 * all siblings (if siblings is true) and their descendants (if
214 also descend is true)
215 * the siblings of the parent (if ascend is true) and their
216 descendants (if also descend is true), and so on
218 If `condition` is not None, the iterable contains only nodes
219 for which ``condition(node)`` is true. If `condition` is a
220 node class ``cls``, it is equivalent to a function consisting
221 of ``return isinstance(node, cls)``.
223 If ascend is true, assume siblings to be true as well.
225 For example, given the following tree::
227 <paragraph>
228 <emphasis> <--- emphasis.traverse() and
229 <strong> <--- strong.traverse() are called.
232 <reference name="Baz" refid="baz">
235 Then list(emphasis.traverse()) equals ::
237 [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
239 and list(strong.traverse(ascend=1)) equals ::
241 [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
243 if ascend:
244 siblings=1
245 # Check for special argument combinations that allow using an
246 # optimized version of traverse()
247 if include_self and descend and not siblings:
248 if condition is None:
249 return self._all_traverse()
250 elif isinstance(condition, (types.ClassType, type)):
251 return self._fast_traverse(condition)
252 # Check if `condition` is a class (check for TypeType for Python
253 # implementations that use only new-style classes, like PyPy).
254 if isinstance(condition, (types.ClassType, type)):
255 node_class = condition
256 def condition(node, node_class=node_class):
257 return isinstance(node, node_class)
258 r = []
259 if include_self and (condition is None or condition(self)):
260 r.append(self)
261 if descend and len(self.children):
262 for child in self:
263 r.extend(child.traverse(
264 include_self=1, descend=1, siblings=0, ascend=0,
265 condition=condition))
266 if siblings or ascend:
267 node = self
268 while node.parent:
269 index = node.parent.index(node)
270 for sibling in node.parent[index+1:]:
271 r.extend(sibling.traverse(include_self=1, descend=descend,
272 siblings=0, ascend=0,
273 condition=condition))
274 if not ascend:
275 break
276 else:
277 node = node.parent
278 return r
280 def next_node(self, condition=None,
281 include_self=0, descend=1, siblings=0, ascend=0):
283 Return the first node in the iterable returned by traverse(),
284 or None if the iterable is empty.
286 Parameter list is the same as of traverse. Note that
287 include_self defaults to 0, though.
289 iterable = self.traverse(condition=condition,
290 include_self=include_self, descend=descend,
291 siblings=siblings, ascend=ascend)
292 try:
293 return iterable[0]
294 except IndexError:
295 return None
297 if sys.version_info < (3,):
298 class reprunicode(unicode):
300 A class that removes the initial u from unicode's repr.
303 def __repr__(self):
304 return unicode.__repr__(self)[1:]
305 else:
306 reprunicode = unicode
309 class Text(Node, reprunicode):
312 Instances are terminal nodes (leaves) containing text only; no child
313 nodes or attributes. Initialize by passing a string to the constructor.
314 Access the text itself with the `astext` method.
317 tagname = '#text'
319 children = ()
320 """Text nodes have no children, and cannot have children."""
322 if sys.version_info > (3,):
323 def __new__(cls, data, rawsource=None):
324 """Prevent the rawsource argument from propagating to str."""
325 if isinstance(data, bytes):
326 raise TypeError('expecting str data, not bytes')
327 return reprunicode.__new__(cls, data)
328 else:
329 def __new__(cls, data, rawsource=None):
330 """Prevent the rawsource argument from propagating to str."""
331 return reprunicode.__new__(cls, data)
333 def __init__(self, data, rawsource=''):
335 self.rawsource = rawsource
336 """The raw text from which this element was constructed."""
338 def shortrepr(self, maxlen=18):
339 data = self
340 if len(data) > maxlen:
341 data = data[:maxlen-4] + ' ...'
342 return '<%s: %s>' % (self.tagname, repr(reprunicode(data)))
344 def __repr__(self):
345 return self.shortrepr(maxlen=68)
347 def _dom_node(self, domroot):
348 return domroot.createTextNode(unicode(self))
350 def astext(self):
351 return reprunicode(self)
353 # Note about __unicode__: The implementation of __unicode__ here,
354 # and the one raising NotImplemented in the superclass Node had
355 # to be removed when changing Text to a subclass of unicode instead
356 # of UserString, since there is no way to delegate the __unicode__
357 # call to the superclass unicode:
358 # unicode itself does not have __unicode__ method to delegate to
359 # and calling unicode(self) or unicode.__new__ directly creates
360 # an infinite loop
362 def copy(self):
363 return self.__class__(reprunicode(self), rawsource=self.rawsource)
365 def deepcopy(self):
366 return self.copy()
368 def pformat(self, indent=' ', level=0):
369 result = []
370 indent = indent * level
371 for line in self.splitlines():
372 result.append(indent + line + '\n')
373 return ''.join(result)
375 # rstrip and lstrip are used by substitution definitions where
376 # they are expected to return a Text instance, this was formerly
377 # taken care of by UserString. Note that then and now the
378 # rawsource member is lost.
380 def rstrip(self, chars=None):
381 return self.__class__(reprunicode.rstrip(self, chars))
382 def lstrip(self, chars=None):
383 return self.__class__(reprunicode.lstrip(self, chars))
385 class Element(Node):
388 `Element` is the superclass to all specific elements.
390 Elements contain attributes and child nodes. Elements emulate
391 dictionaries for attributes, indexing by attribute name (a string). To
392 set the attribute 'att' to 'value', do::
394 element['att'] = 'value'
396 There are two special attributes: 'ids' and 'names'. Both are
397 lists of unique identifiers, and names serve as human interfaces
398 to IDs. Names are case- and whitespace-normalized (see the
399 fully_normalize_name() function), and IDs conform to the regular
400 expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
402 Elements also emulate lists for child nodes (element nodes and/or text
403 nodes), indexing by integer. To get the first child node, use::
405 element[0]
407 Elements may be constructed using the ``+=`` operator. To add one new
408 child node to element, do::
410 element += node
412 This is equivalent to ``element.append(node)``.
414 To add a list of multiple child nodes at once, use the same ``+=``
415 operator::
417 element += [node1, node2]
419 This is equivalent to ``element.extend([node1, node2])``.
422 list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
423 """List attributes, automatically initialized to empty lists for
424 all nodes."""
426 tagname = None
427 """The element generic identifier. If None, it is set as an instance
428 attribute to the name of the class."""
430 child_text_separator = '\n\n'
431 """Separator for child nodes, used by `astext()` method."""
433 def __init__(self, rawsource='', *children, **attributes):
434 self.rawsource = rawsource
435 """The raw text from which this element was constructed."""
437 self.children = []
438 """List of child nodes (elements and/or `Text`)."""
440 self.extend(children) # maintain parent info
442 self.attributes = {}
443 """Dictionary of attribute {name: value}."""
445 # Initialize list attributes.
446 for att in self.list_attributes:
447 self.attributes[att] = []
449 for att, value in attributes.items():
450 att = att.lower()
451 if att in self.list_attributes:
452 # mutable list; make a copy for this node
453 self.attributes[att] = value[:]
454 else:
455 self.attributes[att] = value
457 if self.tagname is None:
458 self.tagname = self.__class__.__name__
460 def _dom_node(self, domroot):
461 element = domroot.createElement(self.tagname)
462 for attribute, value in self.attlist():
463 if isinstance(value, list):
464 value = ' '.join([serial_escape('%s' % (v,)) for v in value])
465 element.setAttribute(attribute, '%s' % value)
466 for child in self.children:
467 element.appendChild(child._dom_node(domroot))
468 return element
470 def __repr__(self):
471 data = ''
472 for c in self.children:
473 data += c.shortrepr()
474 if len(data) > 60:
475 data = data[:56] + ' ...'
476 break
477 if self['names']:
478 return '<%s "%s": %s>' % (self.__class__.__name__,
479 '; '.join(self['names']), data)
480 else:
481 return '<%s: %s>' % (self.__class__.__name__, data)
483 def shortrepr(self):
484 if self['names']:
485 return '<%s "%s"...>' % (self.__class__.__name__,
486 '; '.join(self['names']))
487 else:
488 return '<%s...>' % self.tagname
490 def __unicode__(self):
491 if self.children:
492 return u'%s%s%s' % (self.starttag(),
493 ''.join([unicode(c) for c in self.children]),
494 self.endtag())
495 else:
496 return self.emptytag()
498 if sys.version_info > (3,):
499 # 2to3 doesn't convert __unicode__ to __str__
500 __str__ = __unicode__
502 def starttag(self):
503 parts = [self.tagname]
504 for name, value in self.attlist():
505 if value is None: # boolean attribute
506 parts.append(name)
507 elif isinstance(value, list):
508 values = [serial_escape('%s' % (v,)) for v in value]
509 parts.append('%s="%s"' % (name, ' '.join(values)))
510 else:
511 parts.append('%s="%s"' % (name, value))
512 return '<%s>' % ' '.join(parts)
514 def endtag(self):
515 return '</%s>' % self.tagname
517 def emptytag(self):
518 return u'<%s/>' % ' '.join([self.tagname] +
519 ['%s="%s"' % (n, v)
520 for n, v in self.attlist()])
522 def __len__(self):
523 return len(self.children)
525 def __contains__(self, key):
526 # support both membership test for children and attributes
527 # (has_key is translated to "in" by 2to3)
528 if isinstance(key, basestring):
529 return key in self.attributes
530 return key in self.children
532 def __getitem__(self, key):
533 if isinstance(key, basestring):
534 return self.attributes[key]
535 elif isinstance(key, int):
536 return self.children[key]
537 elif isinstance(key, types.SliceType):
538 assert key.step in (None, 1), 'cannot handle slice with stride'
539 return self.children[key.start:key.stop]
540 else:
541 raise TypeError, ('element index must be an integer, a slice, or '
542 'an attribute name string')
544 def __setitem__(self, key, item):
545 if isinstance(key, basestring):
546 self.attributes[str(key)] = item
547 elif isinstance(key, int):
548 self.setup_child(item)
549 self.children[key] = item
550 elif isinstance(key, types.SliceType):
551 assert key.step in (None, 1), 'cannot handle slice with stride'
552 for node in item:
553 self.setup_child(node)
554 self.children[key.start:key.stop] = item
555 else:
556 raise TypeError, ('element index must be an integer, a slice, or '
557 'an attribute name string')
559 def __delitem__(self, key):
560 if isinstance(key, basestring):
561 del self.attributes[key]
562 elif isinstance(key, int):
563 del self.children[key]
564 elif isinstance(key, types.SliceType):
565 assert key.step in (None, 1), 'cannot handle slice with stride'
566 del self.children[key.start:key.stop]
567 else:
568 raise TypeError, ('element index must be an integer, a simple '
569 'slice, or an attribute name string')
571 def __add__(self, other):
572 return self.children + other
574 def __radd__(self, other):
575 return other + self.children
577 def __iadd__(self, other):
578 """Append a node or a list of nodes to `self.children`."""
579 if isinstance(other, Node):
580 self.append(other)
581 elif other is not None:
582 self.extend(other)
583 return self
585 def astext(self):
586 return self.child_text_separator.join(
587 [child.astext() for child in self.children])
589 def non_default_attributes(self):
590 atts = {}
591 for key, value in self.attributes.items():
592 if self.is_not_default(key):
593 atts[key] = value
594 return atts
596 def attlist(self):
597 attlist = self.non_default_attributes().items()
598 attlist.sort()
599 return attlist
601 def get(self, key, failobj=None):
602 return self.attributes.get(key, failobj)
604 def hasattr(self, attr):
605 return attr in self.attributes
607 def delattr(self, attr):
608 if attr in self.attributes:
609 del self.attributes[attr]
611 def setdefault(self, key, failobj=None):
612 return self.attributes.setdefault(key, failobj)
614 has_key = hasattr
616 # support operator in
617 __contains__ = hasattr
619 def append(self, item):
620 self.setup_child(item)
621 self.children.append(item)
623 def extend(self, item):
624 for node in item:
625 self.append(node)
627 def insert(self, index, item):
628 if isinstance(item, Node):
629 self.setup_child(item)
630 self.children.insert(index, item)
631 elif item is not None:
632 self[index:index] = item
634 def pop(self, i=-1):
635 return self.children.pop(i)
637 def remove(self, item):
638 self.children.remove(item)
640 def index(self, item):
641 return self.children.index(item)
643 def is_not_default(self, key):
644 if self[key] == [] and key in self.list_attributes:
645 return 0
646 else:
647 return 1
649 def update_basic_atts(self, dict):
651 Update basic attributes ('ids', 'names', 'classes',
652 'dupnames', but not 'source') from node or dictionary `dict`.
654 if isinstance(dict, Node):
655 dict = dict.attributes
656 for att in ('ids', 'classes', 'names', 'dupnames'):
657 for value in dict.get(att, []):
658 if not value in self[att]:
659 self[att].append(value)
661 def clear(self):
662 self.children = []
664 def replace(self, old, new):
665 """Replace one child `Node` with another child or children."""
666 index = self.index(old)
667 if isinstance(new, Node):
668 self.setup_child(new)
669 self[index] = new
670 elif new is not None:
671 self[index:index+1] = new
673 def replace_self(self, new):
675 Replace `self` node with `new`, where `new` is a node or a
676 list of nodes.
678 update = new
679 if not isinstance(new, Node):
680 # `new` is a list; update first child.
681 try:
682 update = new[0]
683 except IndexError:
684 update = None
685 if isinstance(update, Element):
686 update.update_basic_atts(self)
687 else:
688 # `update` is a Text node or `new` is an empty list.
689 # Assert that we aren't losing any attributes.
690 for att in ('ids', 'names', 'classes', 'dupnames'):
691 assert not self[att], \
692 'Losing "%s" attribute: %s' % (att, self[att])
693 self.parent.replace(self, new)
695 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
697 Return the index of the first child whose class exactly matches.
699 Parameters:
701 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
702 classes. If a tuple, any of the classes may match.
703 - `start`: Initial index to check.
704 - `end`: Initial index to *not* check.
706 if not isinstance(childclass, tuple):
707 childclass = (childclass,)
708 for index in range(start, min(len(self), end)):
709 for c in childclass:
710 if isinstance(self[index], c):
711 return index
712 return None
714 def first_child_not_matching_class(self, childclass, start=0,
715 end=sys.maxint):
717 Return the index of the first child whose class does *not* match.
719 Parameters:
721 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
722 classes. If a tuple, none of the classes may match.
723 - `start`: Initial index to check.
724 - `end`: Initial index to *not* check.
726 if not isinstance(childclass, tuple):
727 childclass = (childclass,)
728 for index in range(start, min(len(self), end)):
729 for c in childclass:
730 if isinstance(self.children[index], c):
731 break
732 else:
733 return index
734 return None
736 def pformat(self, indent=' ', level=0):
737 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
738 [child.pformat(indent, level+1)
739 for child in self.children])
741 def copy(self):
742 return self.__class__(rawsource=self.rawsource, **self.attributes)
744 def deepcopy(self):
745 copy = self.copy()
746 copy.extend([child.deepcopy() for child in self.children])
747 return copy
749 def set_class(self, name):
750 """Add a new class to the "classes" attribute."""
751 warnings.warn('docutils.nodes.Element.set_class deprecated; '
752 "append to Element['classes'] list attribute directly",
753 DeprecationWarning, stacklevel=2)
754 assert ' ' not in name
755 self['classes'].append(name.lower())
757 def note_referenced_by(self, name=None, id=None):
758 """Note that this Element has been referenced by its name
759 `name` or id `id`."""
760 self.referenced = 1
761 # Element.expect_referenced_by_* dictionaries map names or ids
762 # to nodes whose ``referenced`` attribute is set to true as
763 # soon as this node is referenced by the given name or id.
764 # Needed for target propagation.
765 by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
766 by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
767 if by_name:
768 assert name is not None
769 by_name.referenced = 1
770 if by_id:
771 assert id is not None
772 by_id.referenced = 1
775 class TextElement(Element):
778 An element which directly contains text.
780 Its children are all `Text` or `Inline` subclass nodes. You can
781 check whether an element's context is inline simply by checking whether
782 its immediate parent is a `TextElement` instance (including subclasses).
783 This is handy for nodes like `image` that can appear both inline and as
784 standalone body elements.
786 If passing children to `__init__()`, make sure to set `text` to
787 ``''`` or some other suitable value.
790 child_text_separator = ''
791 """Separator for child nodes, used by `astext()` method."""
793 def __init__(self, rawsource='', text='', *children, **attributes):
794 if text != '':
795 textnode = Text(text)
796 Element.__init__(self, rawsource, textnode, *children,
797 **attributes)
798 else:
799 Element.__init__(self, rawsource, *children, **attributes)
802 class FixedTextElement(TextElement):
804 """An element which directly contains preformatted text."""
806 def __init__(self, rawsource='', text='', *children, **attributes):
807 TextElement.__init__(self, rawsource, text, *children, **attributes)
808 self.attributes['xml:space'] = 'preserve'
811 # ========
812 # Mixins
813 # ========
815 class Resolvable:
817 resolved = 0
820 class BackLinkable:
822 def add_backref(self, refid):
823 self['backrefs'].append(refid)
826 # ====================
827 # Element Categories
828 # ====================
830 class Root: pass
832 class Titular: pass
834 class PreBibliographic:
835 """Category of Node which may occur before Bibliographic Nodes."""
837 class Bibliographic: pass
839 class Decorative(PreBibliographic): pass
841 class Structural: pass
843 class Body: pass
845 class General(Body): pass
847 class Sequential(Body):
848 """List-like elements."""
850 class Admonition(Body): pass
852 class Special(Body):
853 """Special internal body elements."""
855 class Invisible(PreBibliographic):
856 """Internal elements that don't appear in output."""
858 class Part: pass
860 class Inline: pass
862 class Referential(Resolvable): pass
865 class Targetable(Resolvable):
867 referenced = 0
869 indirect_reference_name = None
870 """Holds the whitespace_normalized_name (contains mixed case) of a target.
871 Required for MoinMoin/reST compatibility."""
874 class Labeled:
875 """Contains a `label` as its first element."""
878 # ==============
879 # Root Element
880 # ==============
882 class document(Root, Structural, Element):
885 The document root element.
887 Do not instantiate this class directly; use
888 `docutils.utils.new_document()` instead.
891 def __init__(self, settings, reporter, *args, **kwargs):
892 Element.__init__(self, *args, **kwargs)
894 self.current_source = None
895 """Path to or description of the input source being processed."""
897 self.current_line = None
898 """Line number (1-based) of `current_source`."""
900 self.settings = settings
901 """Runtime settings data record."""
903 self.reporter = reporter
904 """System message generator."""
906 self.indirect_targets = []
907 """List of indirect target nodes."""
909 self.substitution_defs = {}
910 """Mapping of substitution names to substitution_definition nodes."""
912 self.substitution_names = {}
913 """Mapping of case-normalized substitution names to case-sensitive
914 names."""
916 self.refnames = {}
917 """Mapping of names to lists of referencing nodes."""
919 self.refids = {}
920 """Mapping of ids to lists of referencing nodes."""
922 self.nameids = {}
923 """Mapping of names to unique id's."""
925 self.nametypes = {}
926 """Mapping of names to hyperlink type (boolean: True => explicit,
927 False => implicit."""
929 self.ids = {}
930 """Mapping of ids to nodes."""
932 self.footnote_refs = {}
933 """Mapping of footnote labels to lists of footnote_reference nodes."""
935 self.citation_refs = {}
936 """Mapping of citation labels to lists of citation_reference nodes."""
938 self.autofootnotes = []
939 """List of auto-numbered footnote nodes."""
941 self.autofootnote_refs = []
942 """List of auto-numbered footnote_reference nodes."""
944 self.symbol_footnotes = []
945 """List of symbol footnote nodes."""
947 self.symbol_footnote_refs = []
948 """List of symbol footnote_reference nodes."""
950 self.footnotes = []
951 """List of manually-numbered footnote nodes."""
953 self.citations = []
954 """List of citation nodes."""
956 self.autofootnote_start = 1
957 """Initial auto-numbered footnote number."""
959 self.symbol_footnote_start = 0
960 """Initial symbol footnote symbol index."""
962 self.id_start = 1
963 """Initial ID number."""
965 self.parse_messages = []
966 """System messages generated while parsing."""
968 self.transform_messages = []
969 """System messages generated while applying transforms."""
971 import docutils.transforms
972 self.transformer = docutils.transforms.Transformer(self)
973 """Storage for transforms to be applied to this document."""
975 self.decoration = None
976 """Document's `decoration` node."""
978 self.document = self
980 def __getstate__(self):
982 Return dict with unpicklable references removed.
984 state = self.__dict__.copy()
985 state['reporter'] = None
986 state['transformer'] = None
987 return state
989 def asdom(self, dom=None):
990 """Return a DOM representation of this document."""
991 if dom is None:
992 import xml.dom.minidom as dom
993 domroot = dom.Document()
994 domroot.appendChild(self._dom_node(domroot))
995 return domroot
997 def set_id(self, node, msgnode=None):
998 for id in node['ids']:
999 if id in self.ids and self.ids[id] is not node:
1000 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
1001 if msgnode != None:
1002 msgnode += msg
1003 if not node['ids']:
1004 for name in node['names']:
1005 id = self.settings.id_prefix + make_id(name)
1006 if id and id not in self.ids:
1007 break
1008 else:
1009 id = ''
1010 while not id or id in self.ids:
1011 id = (self.settings.id_prefix +
1012 self.settings.auto_id_prefix + str(self.id_start))
1013 self.id_start += 1
1014 node['ids'].append(id)
1015 self.ids[id] = node
1016 return id
1018 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
1020 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
1021 booleans representing hyperlink type (True==explicit,
1022 False==implicit). This method updates the mappings.
1024 The following state transition table shows how `self.nameids` ("ids")
1025 and `self.nametypes` ("types") change with new input (a call to this
1026 method), and what actions are performed ("implicit"-type system
1027 messages are INFO/1, and "explicit"-type system messages are ERROR/3):
1029 ==== ===== ======== ======== ======= ==== ===== =====
1030 Old State Input Action New State Notes
1031 ----------- -------- ----------------- ----------- -----
1032 ids types new type sys.msg. dupname ids types
1033 ==== ===== ======== ======== ======= ==== ===== =====
1034 - - explicit - - new True
1035 - - implicit - - new False
1036 None False explicit - - new True
1037 old False explicit implicit old new True
1038 None True explicit explicit new None True
1039 old True explicit explicit new,old None True [#]_
1040 None False implicit implicit new None False
1041 old False implicit implicit new,old None False
1042 None True implicit implicit new None True
1043 old True implicit implicit new old True
1044 ==== ===== ======== ======== ======= ==== ===== =====
1046 .. [#] Do not clear the name-to-id map or invalidate the old target if
1047 both old and new targets are external and refer to identical URIs.
1048 The new target is invalidated regardless.
1050 for name in node['names']:
1051 if name in self.nameids:
1052 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1053 else:
1054 self.nameids[name] = id
1055 self.nametypes[name] = explicit
1057 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1058 old_id = self.nameids[name]
1059 old_explicit = self.nametypes[name]
1060 self.nametypes[name] = old_explicit or explicit
1061 if explicit:
1062 if old_explicit:
1063 level = 2
1064 if old_id is not None:
1065 old_node = self.ids[old_id]
1066 if 'refuri' in node:
1067 refuri = node['refuri']
1068 if old_node['names'] \
1069 and 'refuri' in old_node \
1070 and old_node['refuri'] == refuri:
1071 level = 1 # just inform if refuri's identical
1072 if level > 1:
1073 dupname(old_node, name)
1074 self.nameids[name] = None
1075 msg = self.reporter.system_message(
1076 level, 'Duplicate explicit target name: "%s".' % name,
1077 backrefs=[id], base_node=node)
1078 if msgnode != None:
1079 msgnode += msg
1080 dupname(node, name)
1081 else:
1082 self.nameids[name] = id
1083 if old_id is not None:
1084 old_node = self.ids[old_id]
1085 dupname(old_node, name)
1086 else:
1087 if old_id is not None and not old_explicit:
1088 self.nameids[name] = None
1089 old_node = self.ids[old_id]
1090 dupname(old_node, name)
1091 dupname(node, name)
1092 if not explicit or (not old_explicit and old_id is not None):
1093 msg = self.reporter.info(
1094 'Duplicate implicit target name: "%s".' % name,
1095 backrefs=[id], base_node=node)
1096 if msgnode != None:
1097 msgnode += msg
1099 def has_name(self, name):
1100 return name in self.nameids
1102 # "note" here is an imperative verb: "take note of".
1103 def note_implicit_target(self, target, msgnode=None):
1104 id = self.set_id(target, msgnode)
1105 self.set_name_id_map(target, id, msgnode, explicit=None)
1107 def note_explicit_target(self, target, msgnode=None):
1108 id = self.set_id(target, msgnode)
1109 self.set_name_id_map(target, id, msgnode, explicit=1)
1111 def note_refname(self, node):
1112 self.refnames.setdefault(node['refname'], []).append(node)
1114 def note_refid(self, node):
1115 self.refids.setdefault(node['refid'], []).append(node)
1117 def note_indirect_target(self, target):
1118 self.indirect_targets.append(target)
1119 if target['names']:
1120 self.note_refname(target)
1122 def note_anonymous_target(self, target):
1123 self.set_id(target)
1125 def note_autofootnote(self, footnote):
1126 self.set_id(footnote)
1127 self.autofootnotes.append(footnote)
1129 def note_autofootnote_ref(self, ref):
1130 self.set_id(ref)
1131 self.autofootnote_refs.append(ref)
1133 def note_symbol_footnote(self, footnote):
1134 self.set_id(footnote)
1135 self.symbol_footnotes.append(footnote)
1137 def note_symbol_footnote_ref(self, ref):
1138 self.set_id(ref)
1139 self.symbol_footnote_refs.append(ref)
1141 def note_footnote(self, footnote):
1142 self.set_id(footnote)
1143 self.footnotes.append(footnote)
1145 def note_footnote_ref(self, ref):
1146 self.set_id(ref)
1147 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1148 self.note_refname(ref)
1150 def note_citation(self, citation):
1151 self.citations.append(citation)
1153 def note_citation_ref(self, ref):
1154 self.set_id(ref)
1155 self.citation_refs.setdefault(ref['refname'], []).append(ref)
1156 self.note_refname(ref)
1158 def note_substitution_def(self, subdef, def_name, msgnode=None):
1159 name = whitespace_normalize_name(def_name)
1160 if name in self.substitution_defs:
1161 msg = self.reporter.error(
1162 'Duplicate substitution definition name: "%s".' % name,
1163 base_node=subdef)
1164 if msgnode != None:
1165 msgnode += msg
1166 oldnode = self.substitution_defs[name]
1167 dupname(oldnode, name)
1168 # keep only the last definition:
1169 self.substitution_defs[name] = subdef
1170 # case-insensitive mapping:
1171 self.substitution_names[fully_normalize_name(name)] = name
1173 def note_substitution_ref(self, subref, refname):
1174 subref['refname'] = whitespace_normalize_name(refname)
1176 def note_pending(self, pending, priority=None):
1177 self.transformer.add_pending(pending, priority)
1179 def note_parse_message(self, message):
1180 self.parse_messages.append(message)
1182 def note_transform_message(self, message):
1183 self.transform_messages.append(message)
1185 def note_source(self, source, offset):
1186 self.current_source = source
1187 if offset is None:
1188 self.current_line = offset
1189 else:
1190 self.current_line = offset + 1
1192 def copy(self):
1193 return self.__class__(self.settings, self.reporter,
1194 **self.attributes)
1196 def get_decoration(self):
1197 if not self.decoration:
1198 self.decoration = decoration()
1199 index = self.first_child_not_matching_class(Titular)
1200 if index is None:
1201 self.append(self.decoration)
1202 else:
1203 self.insert(index, self.decoration)
1204 return self.decoration
1207 # ================
1208 # Title Elements
1209 # ================
1211 class title(Titular, PreBibliographic, TextElement): pass
1212 class subtitle(Titular, PreBibliographic, TextElement): pass
1213 class rubric(Titular, TextElement): pass
1216 # ========================
1217 # Bibliographic Elements
1218 # ========================
1220 class docinfo(Bibliographic, Element): pass
1221 class author(Bibliographic, TextElement): pass
1222 class authors(Bibliographic, Element): pass
1223 class organization(Bibliographic, TextElement): pass
1224 class address(Bibliographic, FixedTextElement): pass
1225 class contact(Bibliographic, TextElement): pass
1226 class version(Bibliographic, TextElement): pass
1227 class revision(Bibliographic, TextElement): pass
1228 class status(Bibliographic, TextElement): pass
1229 class date(Bibliographic, TextElement): pass
1230 class copyright(Bibliographic, TextElement): pass
1233 # =====================
1234 # Decorative Elements
1235 # =====================
1237 class decoration(Decorative, Element):
1239 def get_header(self):
1240 if not len(self.children) or not isinstance(self.children[0], header):
1241 self.insert(0, header())
1242 return self.children[0]
1244 def get_footer(self):
1245 if not len(self.children) or not isinstance(self.children[-1], footer):
1246 self.append(footer())
1247 return self.children[-1]
1250 class header(Decorative, Element): pass
1251 class footer(Decorative, Element): pass
1254 # =====================
1255 # Structural Elements
1256 # =====================
1258 class section(Structural, Element): pass
1261 class topic(Structural, Element):
1264 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1265 or textual figures. A topic is just like a section, except that it has no
1266 subsections, and it doesn't have to conform to section placement rules.
1268 Topics are allowed wherever body elements (list, table, etc.) are allowed,
1269 but only at the top level of a section or document. Topics cannot nest
1270 inside topics, sidebars, or body elements; you can't have a topic inside a
1271 table, list, block quote, etc.
1275 class sidebar(Structural, Element):
1278 Sidebars are like miniature, parallel documents that occur inside other
1279 documents, providing related or reference material. A sidebar is
1280 typically offset by a border and "floats" to the side of the page; the
1281 document's main text may flow around it. Sidebars can also be likened to
1282 super-footnotes; their content is outside of the flow of the document's
1283 main text.
1285 Sidebars are allowed wherever body elements (list, table, etc.) are
1286 allowed, but only at the top level of a section or document. Sidebars
1287 cannot nest inside sidebars, topics, or body elements; you can't have a
1288 sidebar inside a table, list, block quote, etc.
1292 class transition(Structural, Element): pass
1295 # ===============
1296 # Body Elements
1297 # ===============
1299 class paragraph(General, TextElement): pass
1300 class compound(General, Element): pass
1301 class container(General, Element): pass
1302 class bullet_list(Sequential, Element): pass
1303 class enumerated_list(Sequential, Element): pass
1304 class list_item(Part, Element): pass
1305 class definition_list(Sequential, Element): pass
1306 class definition_list_item(Part, Element): pass
1307 class term(Part, TextElement): pass
1308 class classifier(Part, TextElement): pass
1309 class definition(Part, Element): pass
1310 class field_list(Sequential, Element): pass
1311 class field(Part, Element): pass
1312 class field_name(Part, TextElement): pass
1313 class field_body(Part, Element): pass
1316 class option(Part, Element):
1318 child_text_separator = ''
1321 class option_argument(Part, TextElement):
1323 def astext(self):
1324 return self.get('delimiter', ' ') + TextElement.astext(self)
1327 class option_group(Part, Element):
1329 child_text_separator = ', '
1332 class option_list(Sequential, Element): pass
1335 class option_list_item(Part, Element):
1337 child_text_separator = ' '
1340 class option_string(Part, TextElement): pass
1341 class description(Part, Element): pass
1342 class literal_block(General, FixedTextElement): pass
1343 class doctest_block(General, FixedTextElement): pass
1344 class math_block(General, FixedTextElement): pass
1345 class line_block(General, Element): pass
1348 class line(Part, TextElement):
1350 indent = None
1353 class block_quote(General, Element): pass
1354 class attribution(Part, TextElement): pass
1355 class attention(Admonition, Element): pass
1356 class caution(Admonition, Element): pass
1357 class danger(Admonition, Element): pass
1358 class error(Admonition, Element): pass
1359 class important(Admonition, Element): pass
1360 class note(Admonition, Element): pass
1361 class tip(Admonition, Element): pass
1362 class hint(Admonition, Element): pass
1363 class warning(Admonition, Element): pass
1364 class admonition(Admonition, Element): pass
1365 class comment(Special, Invisible, FixedTextElement): pass
1366 class substitution_definition(Special, Invisible, TextElement): pass
1367 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1368 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1369 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1370 class label(Part, TextElement): pass
1371 class figure(General, Element): pass
1372 class caption(Part, TextElement): pass
1373 class legend(Part, Element): pass
1374 class table(General, Element): pass
1375 class tgroup(Part, Element): pass
1376 class colspec(Part, Element): pass
1377 class thead(Part, Element): pass
1378 class tbody(Part, Element): pass
1379 class row(Part, Element): pass
1380 class entry(Part, Element): pass
1383 class system_message(Special, BackLinkable, PreBibliographic, Element):
1386 System message element.
1388 Do not instantiate this class directly; use
1389 ``document.reporter.info/warning/error/severe()`` instead.
1392 def __init__(self, message=None, *children, **attributes):
1393 if message:
1394 p = paragraph('', message)
1395 children = (p,) + children
1396 try:
1397 Element.__init__(self, '', *children, **attributes)
1398 except:
1399 print 'system_message: children=%r' % (children,)
1400 raise
1402 def astext(self):
1403 line = self.get('line', '')
1404 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1405 self['level'], Element.astext(self))
1408 class pending(Special, Invisible, Element):
1411 The "pending" element is used to encapsulate a pending operation: the
1412 operation (transform), the point at which to apply it, and any data it
1413 requires. Only the pending operation's location within the document is
1414 stored in the public document tree (by the "pending" object itself); the
1415 operation and its data are stored in the "pending" object's internal
1416 instance attributes.
1418 For example, say you want a table of contents in your reStructuredText
1419 document. The easiest way to specify where to put it is from within the
1420 document, with a directive::
1422 .. contents::
1424 But the "contents" directive can't do its work until the entire document
1425 has been parsed and possibly transformed to some extent. So the directive
1426 code leaves a placeholder behind that will trigger the second phase of its
1427 processing, something like this::
1429 <pending ...public attributes...> + internal attributes
1431 Use `document.note_pending()` so that the
1432 `docutils.transforms.Transformer` stage of processing can run all pending
1433 transforms.
1436 def __init__(self, transform, details=None,
1437 rawsource='', *children, **attributes):
1438 Element.__init__(self, rawsource, *children, **attributes)
1440 self.transform = transform
1441 """The `docutils.transforms.Transform` class implementing the pending
1442 operation."""
1444 self.details = details or {}
1445 """Detail data (dictionary) required by the pending operation."""
1447 def pformat(self, indent=' ', level=0):
1448 internals = [
1449 '.. internal attributes:',
1450 ' .transform: %s.%s' % (self.transform.__module__,
1451 self.transform.__name__),
1452 ' .details:']
1453 details = self.details.items()
1454 details.sort()
1455 for key, value in details:
1456 if isinstance(value, Node):
1457 internals.append('%7s%s:' % ('', key))
1458 internals.extend(['%9s%s' % ('', line)
1459 for line in value.pformat().splitlines()])
1460 elif value and isinstance(value, list) \
1461 and isinstance(value[0], Node):
1462 internals.append('%7s%s:' % ('', key))
1463 for v in value:
1464 internals.extend(['%9s%s' % ('', line)
1465 for line in v.pformat().splitlines()])
1466 else:
1467 internals.append('%7s%s: %r' % ('', key, value))
1468 return (Element.pformat(self, indent, level)
1469 + ''.join([(' %s%s\n' % (indent * level, line))
1470 for line in internals]))
1472 def copy(self):
1473 return self.__class__(self.transform, self.details, self.rawsource,
1474 **self.attributes)
1477 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1480 Raw data that is to be passed untouched to the Writer.
1483 pass
1486 # =================
1487 # Inline Elements
1488 # =================
1490 class emphasis(Inline, TextElement): pass
1491 class strong(Inline, TextElement): pass
1492 class literal(Inline, TextElement): pass
1493 class reference(General, Inline, Referential, TextElement): pass
1494 class footnote_reference(Inline, Referential, TextElement): pass
1495 class citation_reference(Inline, Referential, TextElement): pass
1496 class substitution_reference(Inline, TextElement): pass
1497 class title_reference(Inline, TextElement): pass
1498 class abbreviation(Inline, TextElement): pass
1499 class acronym(Inline, TextElement): pass
1500 class superscript(Inline, TextElement): pass
1501 class subscript(Inline, TextElement): pass
1502 class math(Inline, TextElement): pass
1505 class image(General, Inline, Element):
1507 def astext(self):
1508 return self.get('alt', '')
1511 class inline(Inline, TextElement): pass
1512 class problematic(Inline, TextElement): pass
1513 class generated(Inline, TextElement): pass
1516 # ========================================
1517 # Auxiliary Classes, Functions, and Data
1518 # ========================================
1520 node_class_names = """
1521 Text
1522 abbreviation acronym address admonition attention attribution author
1523 authors
1524 block_quote bullet_list
1525 caption caution citation citation_reference classifier colspec comment
1526 compound contact container copyright
1527 danger date decoration definition definition_list definition_list_item
1528 description docinfo doctest_block document
1529 emphasis entry enumerated_list error
1530 field field_body field_list field_name figure footer
1531 footnote footnote_reference
1532 generated
1533 header hint
1534 image important inline
1535 label legend line line_block list_item literal literal_block
1536 math math_block
1537 note
1538 option option_argument option_group option_list option_list_item
1539 option_string organization
1540 paragraph pending problematic
1541 raw reference revision row rubric
1542 section sidebar status strong subscript substitution_definition
1543 substitution_reference subtitle superscript system_message
1544 table target tbody term tgroup thead tip title title_reference topic
1545 transition
1546 version
1547 warning""".split()
1548 """A list of names of all concrete Node subclasses."""
1551 class NodeVisitor:
1554 "Visitor" pattern [GoF95]_ abstract superclass implementation for
1555 document tree traversals.
1557 Each node class has corresponding methods, doing nothing by
1558 default; override individual methods for specific and useful
1559 behaviour. The `dispatch_visit()` method is called by
1560 `Node.walk()` upon entering a node. `Node.walkabout()` also calls
1561 the `dispatch_departure()` method before exiting a node.
1563 The dispatch methods call "``visit_`` + node class name" or
1564 "``depart_`` + node class name", resp.
1566 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1567 methods should be implemented for *all* node types encountered (such as
1568 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1569 raise exceptions.
1571 For sparse traversals, where only certain node types are of interest,
1572 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1573 processing is desired, subclass `GenericNodeVisitor`.
1575 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1576 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1577 1995.
1580 optional = ()
1582 Tuple containing node class names (as strings).
1584 No exception will be raised if writers do not implement visit
1585 or departure functions for these node classes.
1587 Used to ensure transitional compatibility with existing 3rd-party writers.
1590 def __init__(self, document):
1591 self.document = document
1593 def dispatch_visit(self, node):
1595 Call self."``visit_`` + node class name" with `node` as
1596 parameter. If the ``visit_...`` method does not exist, call
1597 self.unknown_visit.
1599 node_name = node.__class__.__name__
1600 method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1601 self.document.reporter.debug(
1602 'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1603 % (method.__name__, node_name))
1604 return method(node)
1606 def dispatch_departure(self, node):
1608 Call self."``depart_`` + node class name" with `node` as
1609 parameter. If the ``depart_...`` method does not exist, call
1610 self.unknown_departure.
1612 node_name = node.__class__.__name__
1613 method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1614 self.document.reporter.debug(
1615 'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1616 % (method.__name__, node_name))
1617 return method(node)
1619 def unknown_visit(self, node):
1621 Called when entering unknown `Node` types.
1623 Raise an exception unless overridden.
1625 if (self.document.settings.strict_visitor
1626 or node.__class__.__name__ not in self.optional):
1627 raise NotImplementedError(
1628 '%s visiting unknown node type: %s'
1629 % (self.__class__, node.__class__.__name__))
1631 def unknown_departure(self, node):
1633 Called before exiting unknown `Node` types.
1635 Raise exception unless overridden.
1637 if (self.document.settings.strict_visitor
1638 or node.__class__.__name__ not in self.optional):
1639 raise NotImplementedError(
1640 '%s departing unknown node type: %s'
1641 % (self.__class__, node.__class__.__name__))
1644 class SparseNodeVisitor(NodeVisitor):
1647 Base class for sparse traversals, where only certain node types are of
1648 interest. When ``visit_...`` & ``depart_...`` methods should be
1649 implemented for *all* node types (such as for `docutils.writers.Writer`
1650 subclasses), subclass `NodeVisitor` instead.
1654 class GenericNodeVisitor(NodeVisitor):
1657 Generic "Visitor" abstract superclass, for simple traversals.
1659 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1660 each ``depart_...`` method (when using `Node.walkabout()`) calls
1661 `default_departure()`. `default_visit()` (and `default_departure()`) must
1662 be overridden in subclasses.
1664 Define fully generic visitors by overriding `default_visit()` (and
1665 `default_departure()`) only. Define semi-generic visitors by overriding
1666 individual ``visit_...()`` (and ``depart_...()``) methods also.
1668 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1669 be overridden for default behavior.
1672 def default_visit(self, node):
1673 """Override for generic, uniform traversals."""
1674 raise NotImplementedError
1676 def default_departure(self, node):
1677 """Override for generic, uniform traversals."""
1678 raise NotImplementedError
1680 def _call_default_visit(self, node):
1681 self.default_visit(node)
1683 def _call_default_departure(self, node):
1684 self.default_departure(node)
1686 def _nop(self, node):
1687 pass
1689 def _add_node_class_names(names):
1690 """Save typing with dynamic assignments:"""
1691 for _name in names:
1692 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1693 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1694 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1695 setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1697 _add_node_class_names(node_class_names)
1700 class TreeCopyVisitor(GenericNodeVisitor):
1703 Make a complete copy of a tree or branch, including element attributes.
1706 def __init__(self, document):
1707 GenericNodeVisitor.__init__(self, document)
1708 self.parent_stack = []
1709 self.parent = []
1711 def get_tree_copy(self):
1712 return self.parent[0]
1714 def default_visit(self, node):
1715 """Copy the current node, and make it the new acting parent."""
1716 newnode = node.copy()
1717 self.parent.append(newnode)
1718 self.parent_stack.append(self.parent)
1719 self.parent = newnode
1721 def default_departure(self, node):
1722 """Restore the previous acting parent."""
1723 self.parent = self.parent_stack.pop()
1726 class TreePruningException(Exception):
1729 Base class for `NodeVisitor`-related tree pruning exceptions.
1731 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1732 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1733 the tree traversed.
1736 pass
1739 class SkipChildren(TreePruningException):
1742 Do not visit any children of the current node. The current node's
1743 siblings and ``depart_...`` method are not affected.
1746 pass
1749 class SkipSiblings(TreePruningException):
1752 Do not visit any more siblings (to the right) of the current node. The
1753 current node's children and its ``depart_...`` method are not affected.
1756 pass
1759 class SkipNode(TreePruningException):
1762 Do not visit the current node's children, and do not call the current
1763 node's ``depart_...`` method.
1766 pass
1769 class SkipDeparture(TreePruningException):
1772 Do not call the current node's ``depart_...`` method. The current node's
1773 children and siblings are not affected.
1776 pass
1779 class NodeFound(TreePruningException):
1782 Raise to indicate that the target of a search has been found. This
1783 exception must be caught by the client; it is not caught by the traversal
1784 code.
1787 pass
1790 class StopTraversal(TreePruningException):
1793 Stop the traversal alltogether. The current node's ``depart_...`` method
1794 is not affected. The parent nodes ``depart_...`` methods are also called
1795 as usual. No other nodes are visited. This is an alternative to
1796 NodeFound that does not cause exception handling to trickle up to the
1797 caller.
1800 pass
1803 def make_id(string):
1805 Convert `string` into an identifier and return it.
1807 Docutils identifiers will conform to the regular expression
1808 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1809 and "id" attributes) should have no underscores, colons, or periods.
1810 Hyphens may be used.
1812 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1814 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1815 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1816 underscores ("_"), colons (":"), and periods (".").
1818 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1819 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1820 "escape" 8-bit characters have been replaced with entities)::
1822 unicode \\[0-9a-f]{1,4}
1823 latin1 [&iexcl;-&yuml;]
1824 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1825 nmchar [-a-z0-9]|{latin1}|{escape}
1826 name {nmchar}+
1828 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1829 or periods ("."), therefore "class" and "id" attributes should not contain
1830 these characters. They should be replaced with hyphens ("-"). Combined
1831 with HTML's requirements (the first character must be a letter; no
1832 "unicode", "latin1", or "escape" characters), this results in the
1833 ``[a-z](-?[a-z0-9]+)*`` pattern.
1835 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1836 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1838 id = string.lower()
1839 if not isinstance(id, unicode):
1840 id = id.decode()
1841 id = id.translate(_non_id_translate_digraphs)
1842 id = id.translate(_non_id_translate)
1843 # get rid of non-ascii characters.
1844 # 'ascii' lowercase to prevent problems with turkish locale.
1845 id = unicodedata.normalize('NFKD', id).\
1846 encode('ascii', 'ignore').decode('ascii')
1847 # shrink runs of whitespace and replace by hyphen
1848 id = _non_id_chars.sub('-', ' '.join(id.split()))
1849 id = _non_id_at_ends.sub('', id)
1850 return str(id)
1852 _non_id_chars = re.compile('[^a-z0-9]+')
1853 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1854 _non_id_translate = {
1855 0x00f8: u'o', # o with stroke
1856 0x0111: u'd', # d with stroke
1857 0x0127: u'h', # h with stroke
1858 0x0131: u'i', # dotless i
1859 0x0142: u'l', # l with stroke
1860 0x0167: u't', # t with stroke
1861 0x0180: u'b', # b with stroke
1862 0x0183: u'b', # b with topbar
1863 0x0188: u'c', # c with hook
1864 0x018c: u'd', # d with topbar
1865 0x0192: u'f', # f with hook
1866 0x0199: u'k', # k with hook
1867 0x019a: u'l', # l with bar
1868 0x019e: u'n', # n with long right leg
1869 0x01a5: u'p', # p with hook
1870 0x01ab: u't', # t with palatal hook
1871 0x01ad: u't', # t with hook
1872 0x01b4: u'y', # y with hook
1873 0x01b6: u'z', # z with stroke
1874 0x01e5: u'g', # g with stroke
1875 0x0225: u'z', # z with hook
1876 0x0234: u'l', # l with curl
1877 0x0235: u'n', # n with curl
1878 0x0236: u't', # t with curl
1879 0x0237: u'j', # dotless j
1880 0x023c: u'c', # c with stroke
1881 0x023f: u's', # s with swash tail
1882 0x0240: u'z', # z with swash tail
1883 0x0247: u'e', # e with stroke
1884 0x0249: u'j', # j with stroke
1885 0x024b: u'q', # q with hook tail
1886 0x024d: u'r', # r with stroke
1887 0x024f: u'y', # y with stroke
1889 _non_id_translate_digraphs = {
1890 0x00df: u'sz', # ligature sz
1891 0x00e6: u'ae', # ae
1892 0x0153: u'oe', # ligature oe
1893 0x0238: u'db', # db digraph
1894 0x0239: u'qp', # qp digraph
1897 def dupname(node, name):
1898 node['dupnames'].append(name)
1899 node['names'].remove(name)
1900 # Assume that this method is referenced, even though it isn't; we
1901 # don't want to throw unnecessary system_messages.
1902 node.referenced = 1
1904 def fully_normalize_name(name):
1905 """Return a case- and whitespace-normalized name."""
1906 return ' '.join(name.lower().split())
1908 def whitespace_normalize_name(name):
1909 """Return a whitespace-normalized name."""
1910 return ' '.join(name.split())
1912 def serial_escape(value):
1913 """Escape string values that are elements of a list, for serialization."""
1914 return value.replace('\\', r'\\').replace(' ', r'\ ')
1916 # \f
1918 # Local Variables:
1919 # indent-tabs-mode: nil
1920 # sentence-end-double-space: t
1921 # fill-column: 78
1922 # End: