*** empty log message ***
[docutils.git] / docutils / nodes.py
blobc5ccbadf8a144d0dcd5df0ebf4cd51014c22d34e
1 # Author: David Goodger
2 # Contact: goodger@users.sourceforge.net
3 # Revision: $Revision$
4 # Date: $Date$
5 # Copyright: This module has been placed in the public domain.
7 """
8 Docutils document tree element class library.
10 Classes in CamelCase are abstract base classes or auxiliary classes. The one
11 exception is `Text`, for a text (PCDATA) node; uppercase is used to
12 differentiate from element classes. Classes in lower_case_with_underscores
13 are element classes, matching the XML element generic identifiers in the DTD_.
15 The position of each node (the level at which it can occur) is significant and
16 is represented by abstract base classes (`Root`, `Structural`, `Body`,
17 `Inline`, etc.). Certain transformations will be easier because we can use
18 ``isinstance(node, base_class)`` to determine the position of the node in the
19 hierarchy.
21 .. _DTD: http://docutils.sourceforge.net/spec/docutils.dtd
22 """
24 __docformat__ = 'reStructuredText'
26 import sys
27 import os
28 import re
29 import xml.dom.minidom
30 from types import IntType, SliceType, StringType, UnicodeType, \
31 TupleType, ListType
32 from UserString import UserString
35 # ==============================
36 # Functional Node Base Classes
37 # ==============================
39 class Node:
41 """Abstract base class of nodes in a document tree."""
43 parent = None
44 """Back-reference to the Node immediately containing this Node."""
46 document = None
47 """The `document` node at the root of the tree containing this Node."""
49 source = None
50 """Path or description of the input source which generated this Node."""
52 line = None
53 """The line number (1-based) of the beginning of this Node in `source`."""
55 def __nonzero__(self):
56 """
57 Node instances are always true, even if they're empty. A node is more
58 than a simple container. Its boolean "truth" does not depend on
59 having one or more subnodes in the doctree.
61 Use `len()` to check node length. Use `None` to represent a boolean
62 false value.
63 """
64 return 1
66 def asdom(self, dom=xml.dom.minidom):
67 """Return a DOM **fragment** representation of this Node."""
68 domroot = dom.Document()
69 return self._dom_node(domroot)
71 def pformat(self, indent=' ', level=0):
72 """Return an indented pseudo-XML representation, for test purposes."""
73 raise NotImplementedError
75 def copy(self):
76 """Return a copy of self."""
77 raise NotImplementedError
79 def setup_child(self, child):
80 child.parent = self
81 if self.document:
82 child.document = self.document
83 if child.source is None:
84 child.source = self.document.current_source
85 if child.line is None:
86 child.line = self.document.current_line
88 def walk(self, visitor):
89 """
90 Traverse a tree of `Node` objects, calling ``visit_...`` methods of
91 `visitor` when entering each node. If there is no
92 ``visit_particular_node`` method for a node of type
93 ``particular_node``, the ``unknown_visit`` method is called. (The
94 `walkabout()` method is similar, except it also calls ``depart_...``
95 methods before exiting each node.)
97 This tree traversal supports limited in-place tree
98 modifications. Replacing one node with one or more nodes is
99 OK, as is removing an element. However, if the node removed
100 or replaced occurs after the current node, the old node will
101 still be traversed, and any new nodes will not.
103 Within ``visit_...`` methods (and ``depart_...`` methods for
104 `walkabout()`), `TreePruningException` subclasses may be raised
105 (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
107 Parameter `visitor`: A `NodeVisitor` object, containing a
108 ``visit_...`` method for each `Node` subclass encountered.
110 name = 'visit_' + self.__class__.__name__
111 method = getattr(visitor, name, visitor.unknown_visit)
112 visitor.document.reporter.debug(name, category='nodes.Node.walk')
113 try:
114 method(self)
115 except (SkipChildren, SkipNode):
116 return
117 except SkipDeparture: # not applicable; ignore
118 pass
119 children = self.get_children()
120 try:
121 for child in children[:]:
122 child.walk(visitor)
123 except SkipSiblings:
124 pass
126 def walkabout(self, visitor):
128 Perform a tree traversal similarly to `Node.walk()` (which see),
129 except also call ``depart_...`` methods before exiting each node. If
130 there is no ``depart_particular_node`` method for a node of type
131 ``particular_node``, the ``unknown_departure`` method is called.
133 Parameter `visitor`: A `NodeVisitor` object, containing ``visit_...``
134 and ``depart_...`` methods for each `Node` subclass encountered.
136 call_depart = 1
137 name = 'visit_' + self.__class__.__name__
138 method = getattr(visitor, name, visitor.unknown_visit)
139 visitor.document.reporter.debug(name, category='nodes.Node.walkabout')
140 try:
141 try:
142 method(self)
143 except SkipNode:
144 return
145 except SkipDeparture:
146 call_depart = 0
147 children = self.get_children()
148 try:
149 for child in children[:]:
150 child.walkabout(visitor)
151 except SkipSiblings:
152 pass
153 except SkipChildren:
154 pass
155 if call_depart:
156 name = 'depart_' + self.__class__.__name__
157 method = getattr(visitor, name, visitor.unknown_departure)
158 visitor.document.reporter.debug(
159 name, category='nodes.Node.walkabout')
160 method(self)
163 class Text(Node, UserString):
166 Instances are terminal nodes (leaves) containing text only; no child
167 nodes or attributes. Initialize by passing a string to the constructor.
168 Access the text itself with the `astext` method.
171 tagname = '#text'
173 def __init__(self, data, rawsource=''):
174 UserString.__init__(self, data)
176 self.rawsource = rawsource
177 """The raw text from which this element was constructed."""
179 def __repr__(self):
180 data = repr(self.data)
181 if len(data) > 70:
182 data = repr(self.data[:64] + ' ...')
183 return '<%s: %s>' % (self.tagname, data)
185 def __len__(self):
186 return len(self.data)
188 def shortrepr(self):
189 data = repr(self.data)
190 if len(data) > 20:
191 data = repr(self.data[:16] + ' ...')
192 return '<%s: %s>' % (self.tagname, data)
194 def _dom_node(self, domroot):
195 return domroot.createTextNode(self.data)
197 def astext(self):
198 return self.data
200 def copy(self):
201 return self.__class__(self.data)
203 def pformat(self, indent=' ', level=0):
204 result = []
205 indent = indent * level
206 for line in self.data.splitlines():
207 result.append(indent + line + '\n')
208 return ''.join(result)
210 def get_children(self):
211 """Text nodes have no children. Return []."""
212 return []
215 class Element(Node):
218 `Element` is the superclass to all specific elements.
220 Elements contain attributes and child nodes. Elements emulate
221 dictionaries for attributes, indexing by attribute name (a string). To
222 set the attribute 'att' to 'value', do::
224 element['att'] = 'value'
226 Elements also emulate lists for child nodes (element nodes and/or text
227 nodes), indexing by integer. To get the first child node, use::
229 element[0]
231 Elements may be constructed using the ``+=`` operator. To add one new
232 child node to element, do::
234 element += node
236 This is equivalent to ``element.append(node)``.
238 To add a list of multiple child nodes at once, use the same ``+=``
239 operator::
241 element += [node1, node2]
243 This is equivalent to ``element.extend([node1, node2])``.
246 tagname = None
247 """The element generic identifier. If None, it is set as an instance
248 attribute to the name of the class."""
250 child_text_separator = '\n\n'
251 """Separator for child nodes, used by `astext()` method."""
253 def __init__(self, rawsource='', *children, **attributes):
254 self.rawsource = rawsource
255 """The raw text from which this element was constructed."""
257 self.children = []
258 """List of child nodes (elements and/or `Text`)."""
260 self.extend(children) # maintain parent info
262 self.attributes = {}
263 """Dictionary of attribute {name: value}."""
265 for att, value in attributes.items():
266 self.attributes[att.lower()] = value
268 if self.tagname is None:
269 self.tagname = self.__class__.__name__
271 def _dom_node(self, domroot):
272 element = domroot.createElement(self.tagname)
273 for attribute, value in self.attributes.items():
274 if isinstance(value, ListType):
275 value = ' '.join(['%s' % v for v in value])
276 element.setAttribute(attribute, '%s' % value)
277 for child in self.children:
278 element.appendChild(child._dom_node(domroot))
279 return element
281 def __repr__(self):
282 data = ''
283 for c in self.children:
284 data += c.shortrepr()
285 if len(data) > 60:
286 data = data[:56] + ' ...'
287 break
288 if self.hasattr('name'):
289 return '<%s "%s": %s>' % (self.__class__.__name__,
290 self.attributes['name'], data)
291 else:
292 return '<%s: %s>' % (self.__class__.__name__, data)
294 def shortrepr(self):
295 if self.hasattr('name'):
296 return '<%s "%s"...>' % (self.__class__.__name__,
297 self.attributes['name'])
298 else:
299 return '<%s...>' % self.tagname
301 def __str__(self):
302 return self.__unicode__().encode('raw_unicode_escape')
304 def __unicode__(self):
305 if self.children:
306 return u'%s%s%s' % (self.starttag(),
307 ''.join([str(c) for c in self.children]),
308 self.endtag())
309 else:
310 return self.emptytag()
312 def starttag(self):
313 parts = [self.tagname]
314 for name, value in self.attlist():
315 if value is None: # boolean attribute
316 parts.append(name)
317 elif isinstance(value, ListType):
318 values = ['%s' % v for v in value]
319 parts.append('%s="%s"' % (name, ' '.join(values)))
320 else:
321 parts.append('%s="%s"' % (name, value))
322 return '<%s>' % ' '.join(parts)
324 def endtag(self):
325 return '</%s>' % self.tagname
327 def emptytag(self):
328 return u'<%s/>' % ' '.join([self.tagname] +
329 ['%s="%s"' % (n, v)
330 for n, v in self.attlist()])
332 def __len__(self):
333 return len(self.children)
335 def __getitem__(self, key):
336 if isinstance(key, UnicodeType) or isinstance(key, StringType):
337 return self.attributes[key]
338 elif isinstance(key, IntType):
339 return self.children[key]
340 elif isinstance(key, SliceType):
341 assert key.step in (None, 1), 'cannot handle slice with stride'
342 return self.children[key.start:key.stop]
343 else:
344 raise TypeError, ('element index must be an integer, a slice, or '
345 'an attribute name string')
347 def __setitem__(self, key, item):
348 if isinstance(key, UnicodeType) or isinstance(key, StringType):
349 self.attributes[str(key)] = item
350 elif isinstance(key, IntType):
351 self.setup_child(item)
352 self.children[key] = item
353 elif isinstance(key, SliceType):
354 assert key.step in (None, 1), 'cannot handle slice with stride'
355 for node in item:
356 self.setup_child(node)
357 self.children[key.start:key.stop] = item
358 else:
359 raise TypeError, ('element index must be an integer, a slice, or '
360 'an attribute name string')
362 def __delitem__(self, key):
363 if isinstance(key, UnicodeType) or isinstance(key, StringType):
364 del self.attributes[key]
365 elif isinstance(key, IntType):
366 del self.children[key]
367 elif isinstance(key, SliceType):
368 assert key.step in (None, 1), 'cannot handle slice with stride'
369 del self.children[key.start:key.stop]
370 else:
371 raise TypeError, ('element index must be an integer, a simple '
372 'slice, or an attribute name string')
374 def __add__(self, other):
375 return self.children + other
377 def __radd__(self, other):
378 return other + self.children
380 def __iadd__(self, other):
381 """Append a node or a list of nodes to `self.children`."""
382 if isinstance(other, Node):
383 self.setup_child(other)
384 self.children.append(other)
385 elif other is not None:
386 for node in other:
387 self.setup_child(node)
388 self.children.extend(other)
389 return self
391 def astext(self):
392 return self.child_text_separator.join(
393 [child.astext() for child in self.children])
395 def attlist(self):
396 attlist = self.attributes.items()
397 attlist.sort()
398 return attlist
400 def get(self, key, failobj=None):
401 return self.attributes.get(key, failobj)
403 def hasattr(self, attr):
404 return self.attributes.has_key(attr)
406 def delattr(self, attr):
407 if self.attributes.has_key(attr):
408 del self.attributes[attr]
410 def setdefault(self, key, failobj=None):
411 return self.attributes.setdefault(key, failobj)
413 has_key = hasattr
415 def append(self, item):
416 self.setup_child(item)
417 self.children.append(item)
419 def extend(self, item):
420 for node in item:
421 self.setup_child(node)
422 self.children.extend(item)
424 def insert(self, index, item):
425 if isinstance(item, Node):
426 self.setup_child(item)
427 self.children.insert(index, item)
428 elif item is not None:
429 self[index:index] = item
431 def pop(self, i=-1):
432 return self.children.pop(i)
434 def remove(self, item):
435 self.children.remove(item)
437 def index(self, item):
438 return self.children.index(item)
440 def replace(self, old, new):
441 """Replace one child `Node` with another child or children."""
442 index = self.index(old)
443 if isinstance(new, Node):
444 self.setup_child(new)
445 self[index] = new
446 elif new is not None:
447 self[index:index+1] = new
449 def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
451 Return the index of the first child whose class exactly matches.
453 Parameters:
455 - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
456 classes. If a tuple, any of the classes may match.
457 - `start`: Initial index to check.
458 - `end`: Initial index to *not* check.
460 if not isinstance(childclass, TupleType):
461 childclass = (childclass,)
462 for index in range(start, min(len(self), end)):
463 for c in childclass:
464 if isinstance(self[index], c):
465 return index
466 return None
468 def first_child_not_matching_class(self, childclass, start=0,
469 end=sys.maxint):
471 Return the index of the first child whose class does *not* match.
473 Parameters:
475 - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
476 classes. If a tuple, none of the classes may match.
477 - `start`: Initial index to check.
478 - `end`: Initial index to *not* check.
480 if not isinstance(childclass, TupleType):
481 childclass = (childclass,)
482 for index in range(start, min(len(self), end)):
483 match = 0
484 for c in childclass:
485 if isinstance(self.children[index], c):
486 match = 1
487 break
488 if not match:
489 return index
490 return None
492 def pformat(self, indent=' ', level=0):
493 return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
494 [child.pformat(indent, level+1)
495 for child in self.children])
497 def get_children(self):
498 """Return this element's children."""
499 return self.children
501 def copy(self):
502 return self.__class__(**self.attributes)
504 def set_class(self, name):
505 """Add a new name to the "class" attribute."""
506 self.attributes['class'] = (self.attributes.get('class', '') + ' '
507 + name.lower()).strip()
510 class TextElement(Element):
513 An element which directly contains text.
515 Its children are all `Text` or `TextElement` subclass nodes. You can
516 check whether an element's context is inline simply by checking whether
517 its immediate parent is a `TextElement` instance (including subclasses).
518 This is handy for nodes like `image` that can appear both inline and as
519 standalone body elements.
521 If passing children to `__init__()`, make sure to set `text` to
522 ``''`` or some other suitable value.
525 child_text_separator = ''
526 """Separator for child nodes, used by `astext()` method."""
528 def __init__(self, rawsource='', text='', *children, **attributes):
529 if text != '':
530 textnode = Text(text)
531 Element.__init__(self, rawsource, textnode, *children,
532 **attributes)
533 else:
534 Element.__init__(self, rawsource, *children, **attributes)
537 class FixedTextElement(TextElement):
539 """An element which directly contains preformatted text."""
541 def __init__(self, rawsource='', text='', *children, **attributes):
542 TextElement.__init__(self, rawsource, text, *children, **attributes)
543 self.attributes['xml:space'] = 'preserve'
546 # ========
547 # Mixins
548 # ========
550 class Resolvable:
552 resolved = 0
555 class BackLinkable:
557 def add_backref(self, refid):
558 self.setdefault('backrefs', []).append(refid)
561 # ====================
562 # Element Categories
563 # ====================
565 class Root: pass
567 class Titular: pass
569 class PreDecorative:
570 """Category of Node which may occur before Decorative Nodes."""
572 class PreBibliographic(PreDecorative):
573 """Category of Node which may occur before Bibliographic Nodes."""
575 class Bibliographic(PreDecorative): pass
577 class Decorative: pass
579 class Structural: pass
581 class Body: pass
583 class General(Body): pass
585 class Sequential(Body): pass
587 class Admonition(Body): pass
589 class Special(Body):
590 """Special internal body elements."""
592 class Invisible:
593 """Internal elements that don't appear in output."""
595 class Part: pass
597 class Inline: pass
599 class Referential(Resolvable): pass
601 class Targetable(Resolvable):
603 referenced = 0
605 indirect_reference_name = None
606 """Holds the whitespace_normalized_name (contains mixed case) of a target"""
608 class Labeled:
609 """Contains a `label` as its first element."""
612 # ==============
613 # Root Element
614 # ==============
616 class document(Root, Structural, Element):
618 def __init__(self, settings, reporter, *args, **kwargs):
619 Element.__init__(self, *args, **kwargs)
621 self.current_source = None
622 """Path to or description of the input source being processed."""
624 self.current_line = None
625 """Line number (1-based) of `current_source`."""
627 self.settings = settings
628 """Runtime settings data record."""
630 self.reporter = reporter
631 """System message generator."""
633 self.external_targets = []
634 """List of external target nodes."""
636 self.internal_targets = []
637 """List of internal target nodes."""
639 self.indirect_targets = []
640 """List of indirect target nodes."""
642 self.substitution_defs = {}
643 """Mapping of substitution names to substitution_definition nodes."""
645 self.substitution_names = {}
646 """Mapping of case-normalized substitution names to case-sensitive
647 names."""
649 self.refnames = {}
650 """Mapping of names to lists of referencing nodes."""
652 self.refids = {}
653 """Mapping of ids to lists of referencing nodes."""
655 self.nameids = {}
656 """Mapping of names to unique id's."""
658 self.nametypes = {}
659 """Mapping of names to hyperlink type (boolean: True => explicit,
660 False => implicit."""
662 self.ids = {}
663 """Mapping of ids to nodes."""
665 self.substitution_refs = {}
666 """Mapping of substitution names to lists of substitution_reference
667 nodes."""
669 self.footnote_refs = {}
670 """Mapping of footnote labels to lists of footnote_reference nodes."""
672 self.citation_refs = {}
673 """Mapping of citation labels to lists of citation_reference nodes."""
675 self.anonymous_targets = []
676 """List of anonymous target nodes."""
678 self.anonymous_refs = []
679 """List of anonymous reference nodes."""
681 self.autofootnotes = []
682 """List of auto-numbered footnote nodes."""
684 self.autofootnote_refs = []
685 """List of auto-numbered footnote_reference nodes."""
687 self.symbol_footnotes = []
688 """List of symbol footnote nodes."""
690 self.symbol_footnote_refs = []
691 """List of symbol footnote_reference nodes."""
693 self.footnotes = []
694 """List of manually-numbered footnote nodes."""
696 self.citations = []
697 """List of citation nodes."""
699 self.autofootnote_start = 1
700 """Initial auto-numbered footnote number."""
702 self.symbol_footnote_start = 0
703 """Initial symbol footnote symbol index."""
705 self.id_start = 1
706 """Initial ID number."""
708 self.parse_messages = []
709 """System messages generated while parsing."""
711 self.transform_messages = []
712 """System messages generated while applying transforms."""
714 import docutils.transforms
715 self.transformer = docutils.transforms.Transformer(self)
716 """Storage for transforms to be applied to this document."""
718 self.document = self
720 def asdom(self, dom=xml.dom.minidom):
721 """Return a DOM representation of this document."""
722 domroot = dom.Document()
723 domroot.appendChild(self._dom_node(domroot))
724 return domroot
726 def set_id(self, node, msgnode=None):
727 if node.has_key('id'):
728 id = node['id']
729 if self.ids.has_key(id) and self.ids[id] is not node:
730 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
731 if msgnode != None:
732 msgnode += msg
733 else:
734 if node.has_key('name'):
735 id = make_id(node['name'])
736 else:
737 id = ''
738 while not id or self.ids.has_key(id):
739 id = 'id%s' % self.id_start
740 self.id_start += 1
741 node['id'] = id
742 self.ids[id] = node
743 return id
745 def set_name_id_map(self, node, id, msgnode=None, explicit=None):
747 `self.nameids` maps names to IDs, while `self.nametypes` maps names to
748 booleans representing hyperlink type (True==explicit,
749 False==implicit). This method updates the mappings.
751 The following state transition table shows how `self.nameids` ("ids")
752 and `self.nametypes` ("types") change with new input (a call to this
753 method), and what actions are performed:
755 ==== ===== ======== ======== ======= ==== ===== =====
756 Old State Input Action New State Notes
757 ----------- -------- ----------------- ----------- -----
758 ids types new type sys.msg. dupname ids types
759 ==== ===== ======== ======== ======= ==== ===== =====
760 -- -- explicit -- -- new True
761 -- -- implicit -- -- new False
762 None False explicit -- -- new True
763 old False explicit implicit old new True
764 None True explicit explicit new None True
765 old True explicit explicit new,old None True [#]_
766 None False implicit implicit new None False
767 old False implicit implicit new,old None False
768 None True implicit implicit new None True
769 old True implicit implicit new old True
770 ==== ===== ======== ======== ======= ==== ===== =====
772 .. [#] Do not clear the name-to-id map or invalidate the old target if
773 both old and new targets are external and refer to identical URIs.
774 The new target is invalidated regardless.
776 if node.has_key('name'):
777 name = node['name']
778 if self.nameids.has_key(name):
779 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
780 else:
781 self.nameids[name] = id
782 self.nametypes[name] = explicit
784 def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
785 old_id = self.nameids[name]
786 old_explicit = self.nametypes[name]
787 self.nametypes[name] = old_explicit or explicit
788 if explicit:
789 if old_explicit:
790 level = 2
791 if old_id is not None:
792 old_node = self.ids[old_id]
793 if node.has_key('refuri'):
794 refuri = node['refuri']
795 if old_node.has_key('name') \
796 and old_node.has_key('refuri') \
797 and old_node['refuri'] == refuri:
798 level = 1 # just inform if refuri's identical
799 if level > 1:
800 dupname(old_node)
801 self.nameids[name] = None
802 msg = self.reporter.system_message(
803 level, 'Duplicate explicit target name: "%s".' % name,
804 backrefs=[id], base_node=node)
805 if msgnode != None:
806 msgnode += msg
807 dupname(node)
808 else:
809 self.nameids[name] = id
810 if old_id is not None:
811 old_node = self.ids[old_id]
812 dupname(old_node)
813 else:
814 if old_id is not None and not old_explicit:
815 self.nameids[name] = None
816 old_node = self.ids[old_id]
817 dupname(old_node)
818 dupname(node)
819 if not explicit or (not old_explicit and old_id is not None):
820 msg = self.reporter.info(
821 'Duplicate implicit target name: "%s".' % name,
822 backrefs=[id], base_node=node)
823 if msgnode != None:
824 msgnode += msg
826 def has_name(self, name):
827 return self.nameids.has_key(name)
829 # "note" here is an imperative verb: "take note of".
830 def note_implicit_target(self, target, msgnode=None):
831 id = self.set_id(target, msgnode)
832 self.set_name_id_map(target, id, msgnode, explicit=None)
834 def note_explicit_target(self, target, msgnode=None):
835 id = self.set_id(target, msgnode)
836 self.set_name_id_map(target, id, msgnode, explicit=1)
838 def note_refname(self, node):
839 self.refnames.setdefault(node['refname'], []).append(node)
841 def note_refid(self, node):
842 self.refids.setdefault(node['refid'], []).append(node)
844 def note_external_target(self, target):
845 self.external_targets.append(target)
847 def note_internal_target(self, target):
848 self.internal_targets.append(target)
850 def note_indirect_target(self, target):
851 self.indirect_targets.append(target)
852 if target.has_key('name'):
853 self.note_refname(target)
855 def note_anonymous_target(self, target):
856 self.set_id(target)
857 self.anonymous_targets.append(target)
859 def note_anonymous_ref(self, ref):
860 self.anonymous_refs.append(ref)
862 def note_autofootnote(self, footnote):
863 self.set_id(footnote)
864 self.autofootnotes.append(footnote)
866 def note_autofootnote_ref(self, ref):
867 self.set_id(ref)
868 self.autofootnote_refs.append(ref)
870 def note_symbol_footnote(self, footnote):
871 self.set_id(footnote)
872 self.symbol_footnotes.append(footnote)
874 def note_symbol_footnote_ref(self, ref):
875 self.set_id(ref)
876 self.symbol_footnote_refs.append(ref)
878 def note_footnote(self, footnote):
879 self.set_id(footnote)
880 self.footnotes.append(footnote)
882 def note_footnote_ref(self, ref):
883 self.set_id(ref)
884 self.footnote_refs.setdefault(ref['refname'], []).append(ref)
885 self.note_refname(ref)
887 def note_citation(self, citation):
888 self.citations.append(citation)
890 def note_citation_ref(self, ref):
891 self.set_id(ref)
892 self.citation_refs.setdefault(ref['refname'], []).append(ref)
893 self.note_refname(ref)
895 def note_substitution_def(self, subdef, def_name, msgnode=None):
896 name = subdef['name'] = whitespace_normalize_name(def_name)
897 if self.substitution_defs.has_key(name):
898 msg = self.reporter.error(
899 'Duplicate substitution definition name: "%s".' % name,
900 base_node=subdef)
901 if msgnode != None:
902 msgnode += msg
903 oldnode = self.substitution_defs[name]
904 dupname(oldnode)
905 # keep only the last definition:
906 self.substitution_defs[name] = subdef
907 # case-insensitive mapping:
908 self.substitution_names[fully_normalize_name(name)] = name
910 def note_substitution_ref(self, subref, refname):
911 name = subref['refname'] = whitespace_normalize_name(refname)
912 self.substitution_refs.setdefault(name, []).append(subref)
914 def note_pending(self, pending, priority=None):
915 self.transformer.add_pending(pending, priority)
917 def note_parse_message(self, message):
918 self.parse_messages.append(message)
920 def note_transform_message(self, message):
921 self.transform_messages.append(message)
923 def note_source(self, source, offset):
924 self.current_source = source
925 if offset is None:
926 self.current_line = offset
927 else:
928 self.current_line = offset + 1
930 def copy(self):
931 return self.__class__(self.settings, self.reporter,
932 **self.attributes)
935 # ================
936 # Title Elements
937 # ================
939 class title(Titular, PreBibliographic, TextElement): pass
940 class subtitle(Titular, PreBibliographic, TextElement): pass
941 class rubric(Titular, TextElement): pass
944 # ========================
945 # Bibliographic Elements
946 # ========================
948 class docinfo(Bibliographic, Element): pass
949 class info(Bibliographic, Element): pass
950 class author(Bibliographic, TextElement): pass
951 class authors(Bibliographic, Element): pass
952 class organization(Bibliographic, TextElement): pass
953 class address(Bibliographic, FixedTextElement): pass
954 class contact(Bibliographic, TextElement): pass
955 class version(Bibliographic, TextElement): pass
956 class revision(Bibliographic, TextElement): pass
957 class status(Bibliographic, TextElement): pass
958 class date(Bibliographic, TextElement): pass
959 class copyright(Bibliographic, TextElement): pass
962 # =====================
963 # Decorative Elements
964 # =====================
966 class decoration(Decorative, Element): pass
967 class header(Decorative, Element): pass
968 class footer(Decorative, Element): pass
971 # =====================
972 # Structural Elements
973 # =====================
975 class section(Structural, Element): pass
978 class topic(Structural, Element):
981 Topics are terminal, "leaf" mini-sections, like block quotes with titles,
982 or textual figures. A topic is just like a section, except that it has no
983 subsections, and it doesn't have to conform to section placement rules.
985 Topics are allowed wherever body elements (list, table, etc.) are allowed,
986 but only at the top level of a section or document. Topics cannot nest
987 inside topics, sidebars, or body elements; you can't have a topic inside a
988 table, list, block quote, etc.
992 class sidebar(Structural, Element):
995 Sidebars are like miniature, parallel documents that occur inside other
996 documents, providing related or reference material. A sidebar is
997 typically offset by a border and "floats" to the side of the page; the
998 document's main text may flow around it. Sidebars can also be likened to
999 super-footnotes; their content is outside of the flow of the document's
1000 main text.
1002 Sidebars are allowed wherever body elements (list, table, etc.) are
1003 allowed, but only at the top level of a section or document. Sidebars
1004 cannot nest inside sidebars, topics, or body elements; you can't have a
1005 sidebar inside a table, list, block quote, etc.
1009 class transition(Structural, Element): pass
1012 # ===============
1013 # Body Elements
1014 # ===============
1016 class paragraph(General, TextElement): pass
1017 class bullet_list(Sequential, Element): pass
1018 class enumerated_list(Sequential, Element): pass
1019 class list_item(Part, Element): pass
1020 class definition_list(Sequential, Element): pass
1021 class definition_list_item(Part, Element): pass
1022 class term(Part, TextElement): pass
1023 class classifier(Part, TextElement): pass
1024 class definition(Part, Element): pass
1025 class field_list(Sequential, Element): pass
1026 class field(Part, Element): pass
1027 class field_name(Part, TextElement): pass
1028 class field_body(Part, Element): pass
1031 class option(Part, Element):
1033 child_text_separator = ''
1036 class option_argument(Part, TextElement):
1038 def astext(self):
1039 return self.get('delimiter', ' ') + TextElement.astext(self)
1042 class option_group(Part, Element):
1044 child_text_separator = ', '
1047 class option_list(Sequential, Element): pass
1050 class option_list_item(Part, Element):
1052 child_text_separator = ' '
1055 class option_string(Part, TextElement): pass
1056 class description(Part, Element): pass
1057 class literal_block(General, FixedTextElement): pass
1058 class doctest_block(General, FixedTextElement): pass
1059 class line_block(General, FixedTextElement): pass
1060 class block_quote(General, Element): pass
1061 class attribution(Part, TextElement): pass
1062 class attention(Admonition, Element): pass
1063 class caution(Admonition, Element): pass
1064 class danger(Admonition, Element): pass
1065 class error(Admonition, Element): pass
1066 class important(Admonition, Element): pass
1067 class note(Admonition, Element): pass
1068 class tip(Admonition, Element): pass
1069 class hint(Admonition, Element): pass
1070 class warning(Admonition, Element): pass
1071 class admonition(Admonition, Element): pass
1072 class comment(Special, Invisible, PreBibliographic, FixedTextElement): pass
1073 class substitution_definition(Special, Invisible, TextElement): pass
1074 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1075 class footnote(General, Element, Labeled, BackLinkable): pass
1076 class citation(General, Element, Labeled, BackLinkable): pass
1077 class label(Part, TextElement): pass
1078 class figure(General, Element): pass
1079 class caption(Part, TextElement): pass
1080 class legend(Part, Element): pass
1081 class table(General, Element): pass
1082 class tgroup(Part, Element): pass
1083 class colspec(Part, Element): pass
1084 class thead(Part, Element): pass
1085 class tbody(Part, Element): pass
1086 class row(Part, Element): pass
1087 class entry(Part, Element): pass
1090 class system_message(Special, PreBibliographic, Element, BackLinkable):
1092 def __init__(self, message=None, *children, **attributes):
1093 if message:
1094 p = paragraph('', message)
1095 children = (p,) + children
1096 try:
1097 Element.__init__(self, '', *children, **attributes)
1098 except:
1099 print 'system_message: children=%r' % (children,)
1100 raise
1102 def astext(self):
1103 line = self.get('line', '')
1104 return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1105 self['level'], Element.astext(self))
1108 class pending(Special, Invisible, PreBibliographic, Element):
1111 The "pending" element is used to encapsulate a pending operation: the
1112 operation (transform), the point at which to apply it, and any data it
1113 requires. Only the pending operation's location within the document is
1114 stored in the public document tree (by the "pending" object itself); the
1115 operation and its data are stored in the "pending" object's internal
1116 instance attributes.
1118 For example, say you want a table of contents in your reStructuredText
1119 document. The easiest way to specify where to put it is from within the
1120 document, with a directive::
1122 .. contents::
1124 But the "contents" directive can't do its work until the entire document
1125 has been parsed and possibly transformed to some extent. So the directive
1126 code leaves a placeholder behind that will trigger the second phase of the
1127 its processing, something like this::
1129 <pending ...public attributes...> + internal attributes
1131 Use `document.note_pending()` so that the
1132 `docutils.transforms.Transformer` stage of processing can run all pending
1133 transforms.
1136 def __init__(self, transform, details=None,
1137 rawsource='', *children, **attributes):
1138 Element.__init__(self, rawsource, *children, **attributes)
1140 self.transform = transform
1141 """The `docutils.transforms.Transform` class implementing the pending
1142 operation."""
1144 self.details = details or {}
1145 """Detail data (dictionary) required by the pending operation."""
1147 def pformat(self, indent=' ', level=0):
1148 internals = [
1149 '.. internal attributes:',
1150 ' .transform: %s.%s' % (self.transform.__module__,
1151 self.transform.__name__),
1152 ' .details:']
1153 details = self.details.items()
1154 details.sort()
1155 for key, value in details:
1156 if isinstance(value, Node):
1157 internals.append('%7s%s:' % ('', key))
1158 internals.extend(['%9s%s' % ('', line)
1159 for line in value.pformat().splitlines()])
1160 elif value and isinstance(value, ListType) \
1161 and isinstance(value[0], Node):
1162 internals.append('%7s%s:' % ('', key))
1163 for v in value:
1164 internals.extend(['%9s%s' % ('', line)
1165 for line in v.pformat().splitlines()])
1166 else:
1167 internals.append('%7s%s: %r' % ('', key, value))
1168 return (Element.pformat(self, indent, level)
1169 + ''.join([(' %s%s\n' % (indent * level, line))
1170 for line in internals]))
1172 def copy(self):
1173 return self.__class__(self.transform, self.details, self.rawsource,
1174 **self.attribuates)
1177 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1180 Raw data that is to be passed untouched to the Writer.
1183 pass
1186 # =================
1187 # Inline Elements
1188 # =================
1190 class emphasis(Inline, TextElement): pass
1191 class strong(Inline, TextElement): pass
1192 class literal(Inline, TextElement): pass
1193 class reference(General, Inline, Referential, TextElement): pass
1194 class footnote_reference(Inline, Referential, TextElement): pass
1195 class citation_reference(Inline, Referential, TextElement): pass
1196 class substitution_reference(Inline, TextElement): pass
1197 class title_reference(Inline, TextElement): pass
1198 class abbreviation(Inline, TextElement): pass
1199 class acronym(Inline, TextElement): pass
1200 class superscript(Inline, TextElement): pass
1201 class subscript(Inline, TextElement): pass
1204 class image(General, Inline, TextElement):
1206 def astext(self):
1207 return self.get('alt', '')
1210 class inline(Inline, TextElement): pass
1211 class problematic(Inline, TextElement): pass
1212 class generated(Inline, TextElement): pass
1215 # ========================================
1216 # Auxiliary Classes, Functions, and Data
1217 # ========================================
1219 node_class_names = """
1220 Text
1221 abbreviation acronym address admonition attention attribution author
1222 authors
1223 block_quote bullet_list
1224 caption caution citation citation_reference classifier colspec comment
1225 contact copyright
1226 danger date decoration definition definition_list definition_list_item
1227 description docinfo doctest_block document
1228 emphasis entry enumerated_list error
1229 field field_body field_list field_name figure footer
1230 footnote footnote_reference
1231 generated
1232 header hint
1233 image important info inline
1234 label legend line_block list_item literal literal_block
1235 note
1236 option option_argument option_group option_list option_list_item
1237 option_string organization
1238 paragraph pending problematic
1239 raw reference revision row rubric
1240 section sidebar status strong subscript substitution_definition
1241 substitution_reference subtitle superscript system_message
1242 table target tbody term tgroup thead tip title title_reference topic
1243 transition
1244 version
1245 warning""".split()
1246 """A list of names of all concrete Node subclasses."""
1249 class NodeVisitor:
1252 "Visitor" pattern [GoF95]_ abstract superclass implementation for document
1253 tree traversals.
1255 Each node class has corresponding methods, doing nothing by default;
1256 override individual methods for specific and useful behaviour. The
1257 "``visit_`` + node class name" method is called by `Node.walk()` upon
1258 entering a node. `Node.walkabout()` also calls the "``depart_`` + node
1259 class name" method before exiting a node.
1261 This is a base class for visitors whose ``visit_...`` & ``depart_...``
1262 methods should be implemented for *all* node types encountered (such as
1263 for `docutils.writers.Writer` subclasses). Unimplemented methods will
1264 raise exceptions.
1266 For sparse traversals, where only certain node types are of interest,
1267 subclass `SparseNodeVisitor` instead. When (mostly or entirely) uniform
1268 processing is desired, subclass `GenericNodeVisitor`.
1270 .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1271 Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1272 1995.
1275 def __init__(self, document):
1276 self.document = document
1278 def unknown_visit(self, node):
1280 Called when entering unknown `Node` types.
1282 Raise an exception unless overridden.
1284 raise NotImplementedError('%s visiting unknown node type: %s'
1285 % (self.__class__, node.__class__.__name__))
1287 def unknown_departure(self, node):
1289 Called before exiting unknown `Node` types.
1291 Raise exception unless overridden.
1293 raise NotImplementedError('%s departing unknown node type: %s'
1294 % (self.__class__, node.__class__.__name__))
1297 class SparseNodeVisitor(NodeVisitor):
1300 Base class for sparse traversals, where only certain node types are of
1301 interest. When ``visit_...`` & ``depart_...`` methods should be
1302 implemented for *all* node types (such as for `docutils.writers.Writer`
1303 subclasses), subclass `NodeVisitor` instead.
1306 class GenericNodeVisitor(NodeVisitor):
1309 Generic "Visitor" abstract superclass, for simple traversals.
1311 Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1312 each ``depart_...`` method (when using `Node.walkabout()`) calls
1313 `default_departure()`. `default_visit()` (and `default_departure()`) must
1314 be overridden in subclasses.
1316 Define fully generic visitors by overriding `default_visit()` (and
1317 `default_departure()`) only. Define semi-generic visitors by overriding
1318 individual ``visit_...()`` (and ``depart_...()``) methods also.
1320 `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1321 be overridden for default behavior.
1324 def default_visit(self, node):
1325 """Override for generic, uniform traversals."""
1326 raise NotImplementedError
1328 def default_departure(self, node):
1329 """Override for generic, uniform traversals."""
1330 raise NotImplementedError
1332 def _call_default_visit(self, node):
1333 self.default_visit(node)
1335 def _call_default_departure(self, node):
1336 self.default_departure(node)
1338 def _nop(self, node):
1339 pass
1341 def _add_node_class_names(names):
1342 """Save typing with dynamic assignments:"""
1343 for _name in names:
1344 setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1345 setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1346 setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1347 setattr(SparseNodeVisitor, 'depart' + _name, _nop)
1349 _add_node_class_names(node_class_names)
1351 class TreeCopyVisitor(GenericNodeVisitor):
1354 Make a complete copy of a tree or branch, including element attributes.
1357 def __init__(self, document):
1358 GenericNodeVisitor.__init__(self, document)
1359 self.parent_stack = []
1360 self.parent = []
1362 def get_tree_copy(self):
1363 return self.parent[0]
1365 def default_visit(self, node):
1366 """Copy the current node, and make it the new acting parent."""
1367 newnode = node.copy()
1368 self.parent.append(newnode)
1369 self.parent_stack.append(self.parent)
1370 self.parent = newnode
1372 def default_departure(self, node):
1373 """Restore the previous acting parent."""
1374 self.parent = self.parent_stack.pop()
1377 class TreePruningException(Exception):
1380 Base class for `NodeVisitor`-related tree pruning exceptions.
1382 Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1383 called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1384 the tree traversed.
1387 pass
1390 class SkipChildren(TreePruningException):
1393 Do not visit any children of the current node. The current node's
1394 siblings and ``depart_...`` method are not affected.
1397 pass
1400 class SkipSiblings(TreePruningException):
1403 Do not visit any more siblings (to the right) of the current node. The
1404 current node's children and its ``depart_...`` method are not affected.
1407 pass
1410 class SkipNode(TreePruningException):
1413 Do not visit the current node's children, and do not call the current
1414 node's ``depart_...`` method.
1417 pass
1420 class SkipDeparture(TreePruningException):
1423 Do not call the current node's ``depart_...`` method. The current node's
1424 children and siblings are not affected.
1427 pass
1430 class NodeFound(TreePruningException):
1433 Raise to indicate that the target of a search has been found. This
1434 exception must be caught by the client; it is not caught by the traversal
1435 code.
1438 pass
1441 def make_id(string):
1443 Convert `string` into an identifier and return it.
1445 Docutils identifiers will conform to the regular expression
1446 ``[a-z](-?[a-z0-9]+)*``. For CSS compatibility, identifiers (the "class"
1447 and "id" attributes) should have no underscores, colons, or periods.
1448 Hyphens may be used.
1450 - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1452 ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1453 followed by any number of letters, digits ([0-9]), hyphens ("-"),
1454 underscores ("_"), colons (":"), and periods (".").
1456 - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1457 a tighter interpretation ("flex" tokenizer notation; "latin1" and
1458 "escape" 8-bit characters have been replaced with entities)::
1460 unicode \\[0-9a-f]{1,4}
1461 latin1 [&iexcl;-&yuml;]
1462 escape {unicode}|\\[ -~&iexcl;-&yuml;]
1463 nmchar [-a-z0-9]|{latin1}|{escape}
1464 name {nmchar}+
1466 The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1467 or periods ("."), therefore "class" and "id" attributes should not contain
1468 these characters. They should be replaced with hyphens ("-"). Combined
1469 with HTML's requirements (the first character must be a letter; no
1470 "unicode", "latin1", or "escape" characters), this results in the
1471 ``[a-z](-?[a-z0-9]+)*`` pattern.
1473 .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1474 .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1476 id = _non_id_chars.sub('-', ' '.join(string.lower().split()))
1477 id = _non_id_at_ends.sub('', id)
1478 return str(id)
1480 _non_id_chars = re.compile('[^a-z0-9]+')
1481 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1483 def dupname(node):
1484 node['dupname'] = node['name']
1485 del node['name']
1487 def fully_normalize_name(name):
1488 """Return a case- and whitespace-normalized name."""
1489 return ' '.join(name.lower().split())
1491 def whitespace_normalize_name(name):
1492 """Return a whitespace-normalized name."""
1493 return ' '.join(name.split())