docutils/nodes.py

   1 # $Id$
   2 # Author: David Goodger <goodger@python.org>
   3 # Copyright: This module has been placed in the public domain.
   4
   5 """
   6 Docutils document tree element class library.
   7
   8 Classes in CamelCase are abstract base classes or auxiliary classes. The one
   9 exception is `Text`, for a text (PCDATA) node; uppercase is used to
  10 differentiate from element classes.  Classes in lower_case_with_underscores
  11 are element classes, matching the XML element generic identifiers in the DTD_.
  12
  13 The position of each node (the level at which it can occur) is significant and
  14 is represented by abstract base classes (`Root`, `Structural`, `Body`,
  15 `Inline`, etc.).  Certain transformations will be easier because we can use
  16 ``isinstance(node, base_class)`` to determine the position of the node in the
  17 hierarchy.
  18
  19 .. _DTD: http://docutils.sourceforge.net/docs/ref/docutils.dtd
  20 """
  21
  22 __docformat__ = 'reStructuredText'
  23
  24 import sys
  25 import os
  26 import re
  27 import warnings
  28 from types import ClassType, SliceType
  29 import unicodedata
  30
  31 # ==============================
  32 #  Functional Node Base Classes
  33 # ==============================
  34
  35 class Node:
  36
  37     """Abstract base class of nodes in a document tree."""
  38
  39     parent = None
  40     """Back-reference to the Node immediately containing this Node."""
  41
  42     document = None
  43     """The `document` node at the root of the tree containing this Node."""
  44
  45     source = None
  46     """Path or description of the input source which generated this Node."""
  47
  48     line = None
  49     """The line number (1-based) of the beginning of this Node in `source`."""
  50
  51     def __nonzero__(self):
  52         """
  53         Node instances are always true, even if they're empty.  A node is more
  54         than a simple container.  Its boolean "truth" does not depend on
  55         having one or more subnodes in the doctree.
  56
  57         Use `len()` to check node length.  Use `None` to represent a boolean
  58         false value.
  59         """
  60         return True
  61
  62     def __str__(self):
  63         return unicode(self).encode('raw_unicode_escape')
  64
  65     def asdom(self, dom=None):
  66         """Return a DOM **fragment** representation of this Node."""
  67         if dom is None:
  68             import xml.dom.minidom as dom
  69         domroot = dom.Document()
  70         return self._dom_node(domroot)
  71
  72     def pformat(self, indent='    ', level=0):
  73         """
  74         Return an indented pseudo-XML representation, for test purposes.
  75
  76         Override in subclasses.
  77         """
  78         raise NotImplementedError
  79
  80     def copy(self):
  81         """Return a copy of self."""
  82         raise NotImplementedError
  83
  84     def deepcopy(self):
  85         """Return a deep copy of self (also copying children)."""
  86         raise NotImplementedError
  87
  88     def setup_child(self, child):
  89         child.parent = self
  90         if self.document:
  91             child.document = self.document
  92             if child.source is None:
  93                 child.source = self.document.current_source
  94             if child.line is None:
  95                 child.line = self.document.current_line
  96
  97     def walk(self, visitor):
  98         """
  99         Traverse a tree of `Node` objects, calling the
 100         `dispatch_visit()` method of `visitor` when entering each
 101         node.  (The `walkabout()` method is similar, except it also
 102         calls the `dispatch_departure()` method before exiting each
 103         node.)
 104
 105         This tree traversal supports limited in-place tree
 106         modifications.  Replacing one node with one or more nodes is
 107         OK, as is removing an element.  However, if the node removed
 108         or replaced occurs after the current node, the old node will
 109         still be traversed, and any new nodes will not.
 110
 111         Within ``visit`` methods (and ``depart`` methods for
 112         `walkabout()`), `TreePruningException` subclasses may be raised
 113         (`SkipChildren`, `SkipSiblings`, `SkipNode`, `SkipDeparture`).
 114
 115         Parameter `visitor`: A `NodeVisitor` object, containing a
 116         ``visit`` implementation for each `Node` subclass encountered.
 117
 118         Return true if we should stop the traversal.
 119         """
 120         stop = 0
 121         visitor.document.reporter.debug(
 122             'docutils.nodes.Node.walk calling dispatch_visit for %s'
 123             % self.__class__.__name__)
 124         try:
 125             try:
 126                 visitor.dispatch_visit(self)
 127             except (SkipChildren, SkipNode):
 128                 return stop
 129             except SkipDeparture:           # not applicable; ignore
 130                 pass
 131             children = self.children
 132             try:
 133                 for child in children[:]:
 134                     if child.walk(visitor):
 135                         stop = 1
 136                         break
 137             except SkipSiblings:
 138                 pass
 139         except StopTraversal:
 140             stop = 1
 141         return stop
 142
 143     def walkabout(self, visitor):
 144         """
 145         Perform a tree traversal similarly to `Node.walk()` (which
 146         see), except also call the `dispatch_departure()` method
 147         before exiting each node.
 148
 149         Parameter `visitor`: A `NodeVisitor` object, containing a
 150         ``visit`` and ``depart`` implementation for each `Node`
 151         subclass encountered.
 152
 153         Return true if we should stop the traversal.
 154         """
 155         call_depart = 1
 156         stop = 0
 157         visitor.document.reporter.debug(
 158             'docutils.nodes.Node.walkabout calling dispatch_visit for %s'
 159             % self.__class__.__name__)
 160         try:
 161             try:
 162                 visitor.dispatch_visit(self)
 163             except SkipNode:
 164                 return stop
 165             except SkipDeparture:
 166                 call_depart = 0
 167             children = self.children
 168             try:
 169                 for child in children[:]:
 170                     if child.walkabout(visitor):
 171                         stop = 1
 172                         break
 173             except SkipSiblings:
 174                 pass
 175         except SkipChildren:
 176             pass
 177         except StopTraversal:
 178             stop = 1
 179         if call_depart:
 180             visitor.document.reporter.debug(
 181                 'docutils.nodes.Node.walkabout calling dispatch_departure '
 182                 'for %s' % self.__class__.__name__)
 183             visitor.dispatch_departure(self)
 184         return stop
 185
 186     def traverse(self, condition=None,
 187                  include_self=1, descend=1, siblings=0, ascend=0):
 188         """
 189         Return an iterable containing
 190
 191         * self (if include_self is true)
 192         * all descendants in tree traversal order (if descend is true)
 193         * all siblings (if siblings is true) and their descendants (if
 194           also descend is true)
 195         * the siblings of the parent (if ascend is true) and their
 196           descendants (if also descend is true), and so on
 197
 198         If `condition` is not None, the iterable contains only nodes
 199         for which ``condition(node)`` is true.  If `condition` is a
 200         node class ``cls``, it is equivalent to a function consisting
 201         of ``return isinstance(node, cls)``.
 202
 203         If ascend is true, assume siblings to be true as well.
 204
 205         For example, given the following tree::
 206
 207             <paragraph>
 208                 <emphasis>      <--- emphasis.traverse() and
 209                     <strong>    <--- strong.traverse() are called.
 210                         Foo
 211                     Bar
 212                 <reference name="Baz" refid="baz">
 213                     Baz
 214
 215         Then list(emphasis.traverse()) equals ::
 216
 217             [<emphasis>, <strong>, <#text: Foo>, <#text: Bar>]
 218
 219         and list(strong.traverse(ascend=1)) equals ::
 220
 221             [<strong>, <#text: Foo>, <#text: Bar>, <reference>, <#text: Baz>]
 222         """
 223         r = []
 224         if ascend:
 225             siblings=1
 226         # Check if `condition` is a class (check for TypeType for Python
 227         # implementations that use only new-style classes, like PyPy).
 228         if isinstance(condition, (ClassType, type)):
 229             node_class = condition
 230             def condition(node, node_class=node_class):
 231                 return isinstance(node, node_class)
 232         if include_self and (condition is None or condition(self)):
 233             r.append(self)
 234         if descend and len(self.children):
 235             for child in self:
 236                 r.extend(child.traverse(
 237                     include_self=1, descend=1, siblings=0, ascend=0,
 238                     condition=condition))
 239         if siblings or ascend:
 240             node = self
 241             while node.parent:
 242                 index = node.parent.index(node)
 243                 for sibling in node.parent[index+1:]:
 244                     r.extend(sibling.traverse(include_self=1, descend=descend,
 245                                               siblings=0, ascend=0,
 246                                               condition=condition))
 247                 if not ascend:
 248                     break
 249                 else:
 250                     node = node.parent
 251         return r
 252
 253     def next_node(self, condition=None,
 254                   include_self=0, descend=1, siblings=0, ascend=0):
 255         """
 256         Return the first node in the iterable returned by traverse(),
 257         or None if the iterable is empty.
 258
 259         Parameter list is the same as of traverse.  Note that
 260         include_self defaults to 0, though.
 261         """
 262         iterable = self.traverse(condition=condition,
 263                                  include_self=include_self, descend=descend,
 264                                  siblings=siblings, ascend=ascend)
 265         try:
 266             return iterable[0]
 267         except IndexError:
 268             return None
 269
 270 class reprunicode(unicode):
 271     """
 272     A class that removes the initial u from unicode's repr.
 273     """
 274
 275     def __repr__(self):
 276         return unicode.__repr__(self)[1:]
 277
 278 class Text(Node, reprunicode):
 279
 280     """
 281     Instances are terminal nodes (leaves) containing text only; no child
 282     nodes or attributes.  Initialize by passing a string to the constructor.
 283     Access the text itself with the `astext` method.
 284     """
 285
 286     tagname = '#text'
 287
 288     children = ()
 289     """Text nodes have no children, and cannot have children."""
 290
 291     def __new__(cls, data, rawsource=None):
 292         """Prevent the rawsource argument from propagating to str."""
 293         return reprunicode.__new__(cls, data)
 294
 295     def __init__(self, data, rawsource=''):
 296
 297         self.rawsource = rawsource
 298         """The raw text from which this element was constructed."""
 299
 300     def __repr__(self):
 301         data = reprunicode.__repr__(self)
 302         if len(data) > 70:
 303             data = reprunicode.__repr__(self[:64] + ' ...')
 304         return '<%s: %s>' % (self.tagname, data)
 305
 306     def shortrepr(self):
 307         data = reprunicode.__repr__(self)
 308         if len(data) > 20:
 309             data = reprunicode.__repr__(self[:16] + ' ...')
 310         return '<%s: %s>' % (self.tagname, data)
 311
 312     def _dom_node(self, domroot):
 313         return domroot.createTextNode(unicode(self))
 314
 315     def astext(self):
 316         return reprunicode(self)
 317
 318     # Note about __unicode__: The implementation of __unicode__ here,
 319     # and the one raising NotImplemented in the superclass Node had
 320     # to be removed when changing Text to a subclass of unicode instead
 321     # of UserString, since there is no way to delegate the __unicode__
 322     # call to the superclass unicode:
 323     # unicode itself does not have __unicode__ method to delegate to
 324     # and calling unicode(self) or unicode.__new__ directly creates
 325     # an infinite loop
 326
 327     def copy(self):
 328         return self.__class__(reprunicode(self), rawsource=self.rawsource)
 329
 330     def deepcopy(self):
 331         return self.copy()
 332
 333     def pformat(self, indent='    ', level=0):
 334         result = []
 335         indent = indent * level
 336         for line in self.splitlines():
 337             result.append(indent + line + '\n')
 338         return ''.join(result)
 339
 340     # rstrip and lstrip are used by substitution definitions where
 341     # they are expected to return a Text instance, this was formerly
 342     # taken care of by UserString. Note that then and now the
 343     # rawsource member is lost.
 344
 345     def rstrip(self, chars=None):
 346         return self.__class__(reprunicode.rstrip(self, chars))
 347     def lstrip(self, chars=None):
 348         return self.__class__(reprunicode.lstrip(self, chars))
 349
 350 class Element(Node):
 351
 352     """
 353     `Element` is the superclass to all specific elements.
 354
 355     Elements contain attributes and child nodes.  Elements emulate
 356     dictionaries for attributes, indexing by attribute name (a string).  To
 357     set the attribute 'att' to 'value', do::
 358
 359         element['att'] = 'value'
 360
 361     There are two special attributes: 'ids' and 'names'.  Both are
 362     lists of unique identifiers, and names serve as human interfaces
 363     to IDs.  Names are case- and whitespace-normalized (see the
 364     fully_normalize_name() function), and IDs conform to the regular
 365     expression ``[a-z](-?[a-z0-9]+)*`` (see the make_id() function).
 366
 367     Elements also emulate lists for child nodes (element nodes and/or text
 368     nodes), indexing by integer.  To get the first child node, use::
 369
 370         element[0]
 371
 372     Elements may be constructed using the ``+=`` operator.  To add one new
 373     child node to element, do::
 374
 375         element += node
 376
 377     This is equivalent to ``element.append(node)``.
 378
 379     To add a list of multiple child nodes at once, use the same ``+=``
 380     operator::
 381
 382         element += [node1, node2]
 383
 384     This is equivalent to ``element.extend([node1, node2])``.
 385     """
 386
 387     list_attributes = ('ids', 'classes', 'names', 'dupnames', 'backrefs')
 388     """List attributes, automatically initialized to empty lists for
 389     all nodes."""
 390
 391     tagname = None
 392     """The element generic identifier. If None, it is set as an instance
 393     attribute to the name of the class."""
 394
 395     child_text_separator = '\n\n'
 396     """Separator for child nodes, used by `astext()` method."""
 397
 398     def __init__(self, rawsource='', *children, **attributes):
 399         self.rawsource = rawsource
 400         """The raw text from which this element was constructed."""
 401
 402         self.children = []
 403         """List of child nodes (elements and/or `Text`)."""
 404
 405         self.extend(children)           # maintain parent info
 406
 407         self.attributes = {}
 408         """Dictionary of attribute {name: value}."""
 409
 410         # Initialize list attributes.
 411         for att in self.list_attributes:
 412             self.attributes[att] = []
 413
 414         for att, value in attributes.items():
 415             att = att.lower()
 416             if att in self.list_attributes:
 417                 # mutable list; make a copy for this node
 418                 self.attributes[att] = value[:]
 419             else:
 420                 self.attributes[att] = value
 421
 422         if self.tagname is None:
 423             self.tagname = self.__class__.__name__
 424
 425     def _dom_node(self, domroot):
 426         element = domroot.createElement(self.tagname)
 427         for attribute, value in self.attlist():
 428             if isinstance(value, list):
 429                 value = ' '.join([serial_escape('%s' % v) for v in value])
 430             element.setAttribute(attribute, '%s' % value)
 431         for child in self.children:
 432             element.appendChild(child._dom_node(domroot))
 433         return element
 434
 435     def __repr__(self):
 436         data = ''
 437         for c in self.children:
 438             data += c.shortrepr()
 439             if len(data) > 60:
 440                 data = data[:56] + ' ...'
 441                 break
 442         if self['names']:
 443             return '<%s "%s": %s>' % (self.__class__.__name__,
 444                                       '; '.join(self['names']), data)
 445         else:
 446             return '<%s: %s>' % (self.__class__.__name__, data)
 447
 448     def shortrepr(self):
 449         if self['names']:
 450             return '<%s "%s"...>' % (self.__class__.__name__,
 451                                      '; '.join(self['names']))
 452         else:
 453             return '<%s...>' % self.tagname
 454
 455     def __unicode__(self):
 456         if self.children:
 457             return u'%s%s%s' % (self.starttag(),
 458                                 ''.join([unicode(c) for c in self.children]),
 459                                 self.endtag())
 460         else:
 461             return self.emptytag()
 462
 463     def starttag(self):
 464         parts = [self.tagname]
 465         for name, value in self.attlist():
 466             if value is None:           # boolean attribute
 467                 parts.append(name)
 468             elif isinstance(value, list):
 469                 values = [serial_escape('%s' % v) for v in value]
 470                 parts.append('%s="%s"' % (name, ' '.join(values)))
 471             else:
 472                 parts.append('%s="%s"' % (name, value))
 473         return '<%s>' % ' '.join(parts)
 474
 475     def endtag(self):
 476         return '</%s>' % self.tagname
 477
 478     def emptytag(self):
 479         return u'<%s/>' % ' '.join([self.tagname] +
 480                                     ['%s="%s"' % (n, v)
 481                                      for n, v in self.attlist()])
 482
 483     def __len__(self):
 484         return len(self.children)
 485
 486     def __getitem__(self, key):
 487         if isinstance(key, unicode) or isinstance(key, str):
 488             return self.attributes[key]
 489         elif isinstance(key, int):
 490             return self.children[key]
 491         elif isinstance(key, SliceType):
 492             assert key.step in (None, 1), 'cannot handle slice with stride'
 493             return self.children[key.start:key.stop]
 494         else:
 495             raise TypeError, ('element index must be an integer, a slice, or '
 496                               'an attribute name string')
 497
 498     def __setitem__(self, key, item):
 499         if isinstance(key, unicode) or isinstance(key, str):
 500             self.attributes[str(key)] = item
 501         elif isinstance(key, int):
 502             self.setup_child(item)
 503             self.children[key] = item
 504         elif isinstance(key, SliceType):
 505             assert key.step in (None, 1), 'cannot handle slice with stride'
 506             for node in item:
 507                 self.setup_child(node)
 508             self.children[key.start:key.stop] = item
 509         else:
 510             raise TypeError, ('element index must be an integer, a slice, or '
 511                               'an attribute name string')
 512
 513     def __delitem__(self, key):
 514         if isinstance(key, unicode) or isinstance(key, str):
 515             del self.attributes[key]
 516         elif isinstance(key, int):
 517             del self.children[key]
 518         elif isinstance(key, SliceType):
 519             assert key.step in (None, 1), 'cannot handle slice with stride'
 520             del self.children[key.start:key.stop]
 521         else:
 522             raise TypeError, ('element index must be an integer, a simple '
 523                               'slice, or an attribute name string')
 524
 525     def __add__(self, other):
 526         return self.children + other
 527
 528     def __radd__(self, other):
 529         return other + self.children
 530
 531     def __iadd__(self, other):
 532         """Append a node or a list of nodes to `self.children`."""
 533         if isinstance(other, Node):
 534             self.append(other)
 535         elif other is not None:
 536             self.extend(other)
 537         return self
 538
 539     def astext(self):
 540         return self.child_text_separator.join(
 541               [child.astext() for child in self.children])
 542
 543     def non_default_attributes(self):
 544         atts = {}
 545         for key, value in self.attributes.items():
 546             if self.is_not_default(key):
 547                 atts[key] = value
 548         return atts
 549
 550     def attlist(self):
 551         attlist = self.non_default_attributes().items()
 552         attlist.sort()
 553         return attlist
 554
 555     def get(self, key, failobj=None):
 556         return self.attributes.get(key, failobj)
 557
 558     def hasattr(self, attr):
 559         return attr in self.attributes
 560
 561     def delattr(self, attr):
 562         if attr in self.attributes:
 563             del self.attributes[attr]
 564
 565     def setdefault(self, key, failobj=None):
 566         return self.attributes.setdefault(key, failobj)
 567
 568     has_key = hasattr
 569
 570     # support operator in
 571     __contains__ = hasattr
 572
 573     def append(self, item):
 574         self.setup_child(item)
 575         self.children.append(item)
 576
 577     def extend(self, item):
 578         for node in item:
 579             self.append(node)
 580
 581     def insert(self, index, item):
 582         if isinstance(item, Node):
 583             self.setup_child(item)
 584             self.children.insert(index, item)
 585         elif item is not None:
 586             self[index:index] = item
 587
 588     def pop(self, i=-1):
 589         return self.children.pop(i)
 590
 591     def remove(self, item):
 592         self.children.remove(item)
 593
 594     def index(self, item):
 595         return self.children.index(item)
 596
 597     def is_not_default(self, key):
 598         if self[key] == [] and key in self.list_attributes:
 599             return 0
 600         else:
 601             return 1
 602
 603     def update_basic_atts(self, dict):
 604         """
 605         Update basic attributes ('ids', 'names', 'classes',
 606         'dupnames', but not 'source') from node or dictionary `dict`.
 607         """
 608         if isinstance(dict, Node):
 609             dict = dict.attributes
 610         for att in ('ids', 'classes', 'names', 'dupnames'):
 611             for value in dict.get(att, []):
 612                 if not value in self[att]:
 613                     self[att].append(value)
 614
 615     def clear(self):
 616         self.children = []
 617
 618     def replace(self, old, new):
 619         """Replace one child `Node` with another child or children."""
 620         index = self.index(old)
 621         if isinstance(new, Node):
 622             self.setup_child(new)
 623             self[index] = new
 624         elif new is not None:
 625             self[index:index+1] = new
 626
 627     def replace_self(self, new):
 628         """
 629         Replace `self` node with `new`, where `new` is a node or a
 630         list of nodes.
 631         """
 632         update = new
 633         if not isinstance(new, Node):
 634             # `new` is a list; update first child.
 635             try:
 636                 update = new[0]
 637             except IndexError:
 638                 update = None
 639         if isinstance(update, Element):
 640             update.update_basic_atts(self)
 641         else:
 642             # `update` is a Text node or `new` is an empty list.
 643             # Assert that we aren't losing any attributes.
 644             for att in ('ids', 'names', 'classes', 'dupnames'):
 645                 assert not self[att], \
 646                        'Losing "%s" attribute: %s' % (att, self[att])
 647         self.parent.replace(self, new)
 648
 649     def first_child_matching_class(self, childclass, start=0, end=sys.maxint):
 650         """
 651         Return the index of the first child whose class exactly matches.
 652
 653         Parameters:
 654
 655         - `childclass`: A `Node` subclass to search for, or a tuple of `Node`
 656           classes. If a tuple, any of the classes may match.
 657         - `start`: Initial index to check.
 658         - `end`: Initial index to *not* check.
 659         """
 660         if not isinstance(childclass, tuple):
 661             childclass = (childclass,)
 662         for index in range(start, min(len(self), end)):
 663             for c in childclass:
 664                 if isinstance(self[index], c):
 665                     return index
 666         return None
 667
 668     def first_child_not_matching_class(self, childclass, start=0,
 669                                        end=sys.maxint):
 670         """
 671         Return the index of the first child whose class does *not* match.
 672
 673         Parameters:
 674
 675         - `childclass`: A `Node` subclass to skip, or a tuple of `Node`
 676           classes. If a tuple, none of the classes may match.
 677         - `start`: Initial index to check.
 678         - `end`: Initial index to *not* check.
 679         """
 680         if not isinstance(childclass, tuple):
 681             childclass = (childclass,)
 682         for index in range(start, min(len(self), end)):
 683             for c in childclass:
 684                 if isinstance(self.children[index], c):
 685                     break
 686             else:
 687                 return index
 688         return None
 689
 690     def pformat(self, indent='    ', level=0):
 691         return ''.join(['%s%s\n' % (indent * level, self.starttag())] +
 692                        [child.pformat(indent, level+1)
 693                         for child in self.children])
 694
 695     def copy(self):
 696         return self.__class__(**self.attributes)
 697
 698     def deepcopy(self):
 699         copy = self.copy()
 700         copy.extend([child.deepcopy() for child in self.children])
 701         return copy
 702
 703     def set_class(self, name):
 704         """Add a new class to the "classes" attribute."""
 705         warnings.warn('docutils.nodes.Element.set_class deprecated; '
 706                       "append to Element['classes'] list attribute directly",
 707                       DeprecationWarning, stacklevel=2)
 708         assert ' ' not in name
 709         self['classes'].append(name.lower())
 710
 711     def note_referenced_by(self, name=None, id=None):
 712         """Note that this Element has been referenced by its name
 713         `name` or id `id`."""
 714         self.referenced = 1
 715         # Element.expect_referenced_by_* dictionaries map names or ids
 716         # to nodes whose ``referenced`` attribute is set to true as
 717         # soon as this node is referenced by the given name or id.
 718         # Needed for target propagation.
 719         by_name = getattr(self, 'expect_referenced_by_name', {}).get(name)
 720         by_id = getattr(self, 'expect_referenced_by_id', {}).get(id)
 721         if by_name:
 722             assert name is not None
 723             by_name.referenced = 1
 724         if by_id:
 725             assert id is not None
 726             by_id.referenced = 1
 727
 728
 729 class TextElement(Element):
 730
 731     """
 732     An element which directly contains text.
 733
 734     Its children are all `Text` or `Inline` subclass nodes.  You can
 735     check whether an element's context is inline simply by checking whether
 736     its immediate parent is a `TextElement` instance (including subclasses).
 737     This is handy for nodes like `image` that can appear both inline and as
 738     standalone body elements.
 739
 740     If passing children to `__init__()`, make sure to set `text` to
 741     ``''`` or some other suitable value.
 742     """
 743
 744     child_text_separator = ''
 745     """Separator for child nodes, used by `astext()` method."""
 746
 747     def __init__(self, rawsource='', text='', *children, **attributes):
 748         if text != '':
 749             textnode = Text(text)
 750             Element.__init__(self, rawsource, textnode, *children,
 751                               **attributes)
 752         else:
 753             Element.__init__(self, rawsource, *children, **attributes)
 754
 755
 756 class FixedTextElement(TextElement):
 757
 758     """An element which directly contains preformatted text."""
 759
 760     def __init__(self, rawsource='', text='', *children, **attributes):
 761         TextElement.__init__(self, rawsource, text, *children, **attributes)
 762         self.attributes['xml:space'] = 'preserve'
 763
 764
 765 # ========
 766 #  Mixins
 767 # ========
 768
 769 class Resolvable:
 770
 771     resolved = 0
 772
 773
 774 class BackLinkable:
 775
 776     def add_backref(self, refid):
 777         self['backrefs'].append(refid)
 778
 779
 780 # ====================
 781 #  Element Categories
 782 # ====================
 783
 784 class Root: pass
 785
 786 class Titular: pass
 787
 788 class PreBibliographic:
 789     """Category of Node which may occur before Bibliographic Nodes."""
 790
 791 class Bibliographic: pass
 792
 793 class Decorative(PreBibliographic): pass
 794
 795 class Structural: pass
 796
 797 class Body: pass
 798
 799 class General(Body): pass
 800
 801 class Sequential(Body):
 802     """List-like elements."""
 803
 804 class Admonition(Body): pass
 805
 806 class Special(Body):
 807     """Special internal body elements."""
 808
 809 class Invisible(PreBibliographic):
 810     """Internal elements that don't appear in output."""
 811
 812 class Part: pass
 813
 814 class Inline: pass
 815
 816 class Referential(Resolvable): pass
 817
 818
 819 class Targetable(Resolvable):
 820
 821     referenced = 0
 822
 823     indirect_reference_name = None
 824     """Holds the whitespace_normalized_name (contains mixed case) of a target.
 825     Required for MoinMoin/reST compatibility."""
 826
 827
 828 class Labeled:
 829     """Contains a `label` as its first element."""
 830
 831
 832 # ==============
 833 #  Root Element
 834 # ==============
 835
 836 class document(Root, Structural, Element):
 837
 838     """
 839     The document root element.
 840
 841     Do not instantiate this class directly; use
 842     `docutils.utils.new_document()` instead.
 843     """
 844
 845     def __init__(self, settings, reporter, *args, **kwargs):
 846         Element.__init__(self, *args, **kwargs)
 847
 848         self.current_source = None
 849         """Path to or description of the input source being processed."""
 850
 851         self.current_line = None
 852         """Line number (1-based) of `current_source`."""
 853
 854         self.settings = settings
 855         """Runtime settings data record."""
 856
 857         self.reporter = reporter
 858         """System message generator."""
 859
 860         self.indirect_targets = []
 861         """List of indirect target nodes."""
 862
 863         self.substitution_defs = {}
 864         """Mapping of substitution names to substitution_definition nodes."""
 865
 866         self.substitution_names = {}
 867         """Mapping of case-normalized substitution names to case-sensitive
 868         names."""
 869
 870         self.refnames = {}
 871         """Mapping of names to lists of referencing nodes."""
 872
 873         self.refids = {}
 874         """Mapping of ids to lists of referencing nodes."""
 875
 876         self.nameids = {}
 877         """Mapping of names to unique id's."""
 878
 879         self.nametypes = {}
 880         """Mapping of names to hyperlink type (boolean: True => explicit,
 881         False => implicit."""
 882
 883         self.ids = {}
 884         """Mapping of ids to nodes."""
 885
 886         self.footnote_refs = {}
 887         """Mapping of footnote labels to lists of footnote_reference nodes."""
 888
 889         self.citation_refs = {}
 890         """Mapping of citation labels to lists of citation_reference nodes."""
 891
 892         self.autofootnotes = []
 893         """List of auto-numbered footnote nodes."""
 894
 895         self.autofootnote_refs = []
 896         """List of auto-numbered footnote_reference nodes."""
 897
 898         self.symbol_footnotes = []
 899         """List of symbol footnote nodes."""
 900
 901         self.symbol_footnote_refs = []
 902         """List of symbol footnote_reference nodes."""
 903
 904         self.footnotes = []
 905         """List of manually-numbered footnote nodes."""
 906
 907         self.citations = []
 908         """List of citation nodes."""
 909
 910         self.autofootnote_start = 1
 911         """Initial auto-numbered footnote number."""
 912
 913         self.symbol_footnote_start = 0
 914         """Initial symbol footnote symbol index."""
 915
 916         self.id_start = 1
 917         """Initial ID number."""
 918
 919         self.parse_messages = []
 920         """System messages generated while parsing."""
 921
 922         self.transform_messages = []
 923         """System messages generated while applying transforms."""
 924
 925         import docutils.transforms
 926         self.transformer = docutils.transforms.Transformer(self)
 927         """Storage for transforms to be applied to this document."""
 928
 929         self.decoration = None
 930         """Document's `decoration` node."""
 931
 932         self.document = self
 933
 934     def __getstate__(self):
 935         """
 936         Return dict with unpicklable references removed.
 937         """
 938         state = self.__dict__.copy()
 939         state['reporter'] = None
 940         state['transformer'] = None
 941         return state
 942
 943     def asdom(self, dom=None):
 944         """Return a DOM representation of this document."""
 945         if dom is None:
 946             import xml.dom.minidom as dom
 947         domroot = dom.Document()
 948         domroot.appendChild(self._dom_node(domroot))
 949         return domroot
 950
 951     def set_id(self, node, msgnode=None):
 952         for id in node['ids']:
 953             if id in self.ids and self.ids[id] is not node:
 954                 msg = self.reporter.severe('Duplicate ID: "%s".' % id)
 955                 if msgnode != None:
 956                     msgnode += msg
 957         if not node['ids']:
 958             for name in node['names']:
 959                 id = self.settings.id_prefix + make_id(name)
 960                 if id and id not in self.ids:
 961                     break
 962             else:
 963                 id = ''
 964                 while not id or id in self.ids:
 965                     id = (self.settings.id_prefix +
 966                           self.settings.auto_id_prefix + str(self.id_start))
 967                     self.id_start += 1
 968             node['ids'].append(id)
 969         self.ids[id] = node
 970         return id
 971
 972     def set_name_id_map(self, node, id, msgnode=None, explicit=None):
 973         """
 974         `self.nameids` maps names to IDs, while `self.nametypes` maps names to
 975         booleans representing hyperlink type (True==explicit,
 976         False==implicit).  This method updates the mappings.
 977
 978         The following state transition table shows how `self.nameids` ("ids")
 979         and `self.nametypes` ("types") change with new input (a call to this
 980         method), and what actions are performed ("implicit"-type system
 981         messages are INFO/1, and "explicit"-type system messages are ERROR/3):
 982
 983         ====  =====  ========  ========  =======  ====  =====  =====
 984          Old State    Input          Action        New State   Notes
 985         -----------  --------  -----------------  -----------  -----
 986         ids   types  new type  sys.msg.  dupname  ids   types
 987         ====  =====  ========  ========  =======  ====  =====  =====
 988         -     -      explicit  -         -        new   True
 989         -     -      implicit  -         -        new   False
 990         None  False  explicit  -         -        new   True
 991         old   False  explicit  implicit  old      new   True
 992         None  True   explicit  explicit  new      None  True
 993         old   True   explicit  explicit  new,old  None  True   [#]_
 994         None  False  implicit  implicit  new      None  False
 995         old   False  implicit  implicit  new,old  None  False
 996         None  True   implicit  implicit  new      None  True
 997         old   True   implicit  implicit  new      old   True
 998         ====  =====  ========  ========  =======  ====  =====  =====
 999
1000         .. [#] Do not clear the name-to-id map or invalidate the old target if
1001            both old and new targets are external and refer to identical URIs.
1002            The new target is invalidated regardless.
1003         """
1004         for name in node['names']:
1005             if name in self.nameids:
1006                 self.set_duplicate_name_id(node, id, name, msgnode, explicit)
1007             else:
1008                 self.nameids[name] = id
1009                 self.nametypes[name] = explicit
1010
1011     def set_duplicate_name_id(self, node, id, name, msgnode, explicit):
1012         old_id = self.nameids[name]
1013         old_explicit = self.nametypes[name]
1014         self.nametypes[name] = old_explicit or explicit
1015         if explicit:
1016             if old_explicit:
1017                 level = 2
1018                 if old_id is not None:
1019                     old_node = self.ids[old_id]
1020                     if 'refuri' in node:
1021                         refuri = node['refuri']
1022                         if old_node['names'] \
1023                                and 'refuri' in old_node \
1024                                and old_node['refuri'] == refuri:
1025                             level = 1   # just inform if refuri's identical
1026                     if level > 1:
1027                         dupname(old_node, name)
1028                         self.nameids[name] = None
1029                 msg = self.reporter.system_message(
1030                     level, 'Duplicate explicit target name: "%s".' % name,
1031                     backrefs=[id], base_node=node)
1032                 if msgnode != None:
1033                     msgnode += msg
1034                 dupname(node, name)
1035             else:
1036                 self.nameids[name] = id
1037                 if old_id is not None:
1038                     old_node = self.ids[old_id]
1039                     dupname(old_node, name)
1040         else:
1041             if old_id is not None and not old_explicit:
1042                 self.nameids[name] = None
1043                 old_node = self.ids[old_id]
1044                 dupname(old_node, name)
1045             dupname(node, name)
1046         if not explicit or (not old_explicit and old_id is not None):
1047             msg = self.reporter.info(
1048                 'Duplicate implicit target name: "%s".' % name,
1049                 backrefs=[id], base_node=node)
1050             if msgnode != None:
1051                 msgnode += msg
1052
1053     def has_name(self, name):
1054         return name in self.nameids
1055
1056     # "note" here is an imperative verb: "take note of".
1057     def note_implicit_target(self, target, msgnode=None):
1058         id = self.set_id(target, msgnode)
1059         self.set_name_id_map(target, id, msgnode, explicit=None)
1060
1061     def note_explicit_target(self, target, msgnode=None):
1062         id = self.set_id(target, msgnode)
1063         self.set_name_id_map(target, id, msgnode, explicit=1)
1064
1065     def note_refname(self, node):
1066         self.refnames.setdefault(node['refname'], []).append(node)
1067
1068     def note_refid(self, node):
1069         self.refids.setdefault(node['refid'], []).append(node)
1070
1071     def note_indirect_target(self, target):
1072         self.indirect_targets.append(target)
1073         if target['names']:
1074             self.note_refname(target)
1075
1076     def note_anonymous_target(self, target):
1077         self.set_id(target)
1078
1079     def note_autofootnote(self, footnote):
1080         self.set_id(footnote)
1081         self.autofootnotes.append(footnote)
1082
1083     def note_autofootnote_ref(self, ref):
1084         self.set_id(ref)
1085         self.autofootnote_refs.append(ref)
1086
1087     def note_symbol_footnote(self, footnote):
1088         self.set_id(footnote)
1089         self.symbol_footnotes.append(footnote)
1090
1091     def note_symbol_footnote_ref(self, ref):
1092         self.set_id(ref)
1093         self.symbol_footnote_refs.append(ref)
1094
1095     def note_footnote(self, footnote):
1096         self.set_id(footnote)
1097         self.footnotes.append(footnote)
1098
1099     def note_footnote_ref(self, ref):
1100         self.set_id(ref)
1101         self.footnote_refs.setdefault(ref['refname'], []).append(ref)
1102         self.note_refname(ref)
1103
1104     def note_citation(self, citation):
1105         self.citations.append(citation)
1106
1107     def note_citation_ref(self, ref):
1108         self.set_id(ref)
1109         self.citation_refs.setdefault(ref['refname'], []).append(ref)
1110         self.note_refname(ref)
1111
1112     def note_substitution_def(self, subdef, def_name, msgnode=None):
1113         name = whitespace_normalize_name(def_name)
1114         if name in self.substitution_defs:
1115             msg = self.reporter.error(
1116                   'Duplicate substitution definition name: "%s".' % name,
1117                   base_node=subdef)
1118             if msgnode != None:
1119                 msgnode += msg
1120             oldnode = self.substitution_defs[name]
1121             dupname(oldnode, name)
1122         # keep only the last definition:
1123         self.substitution_defs[name] = subdef
1124         # case-insensitive mapping:
1125         self.substitution_names[fully_normalize_name(name)] = name
1126
1127     def note_substitution_ref(self, subref, refname):
1128         subref['refname'] = whitespace_normalize_name(refname)
1129
1130     def note_pending(self, pending, priority=None):
1131         self.transformer.add_pending(pending, priority)
1132
1133     def note_parse_message(self, message):
1134         self.parse_messages.append(message)
1135
1136     def note_transform_message(self, message):
1137         self.transform_messages.append(message)
1138
1139     def note_source(self, source, offset):
1140         self.current_source = source
1141         if offset is None:
1142             self.current_line = offset
1143         else:
1144             self.current_line = offset + 1
1145
1146     def copy(self):
1147         return self.__class__(self.settings, self.reporter,
1148                               **self.attributes)
1149
1150     def get_decoration(self):
1151         if not self.decoration:
1152             self.decoration = decoration()
1153             index = self.first_child_not_matching_class(Titular)
1154             if index is None:
1155                 self.append(self.decoration)
1156             else:
1157                 self.insert(index, self.decoration)
1158         return self.decoration
1159
1160
1161 # ================
1162 #  Title Elements
1163 # ================
1164
1165 class title(Titular, PreBibliographic, TextElement): pass
1166 class subtitle(Titular, PreBibliographic, TextElement): pass
1167 class rubric(Titular, TextElement): pass
1168
1169
1170 # ========================
1171 #  Bibliographic Elements
1172 # ========================
1173
1174 class docinfo(Bibliographic, Element): pass
1175 class author(Bibliographic, TextElement): pass
1176 class authors(Bibliographic, Element): pass
1177 class organization(Bibliographic, TextElement): pass
1178 class address(Bibliographic, FixedTextElement): pass
1179 class contact(Bibliographic, TextElement): pass
1180 class version(Bibliographic, TextElement): pass
1181 class revision(Bibliographic, TextElement): pass
1182 class status(Bibliographic, TextElement): pass
1183 class date(Bibliographic, TextElement): pass
1184 class copyright(Bibliographic, TextElement): pass
1185
1186
1187 # =====================
1188 #  Decorative Elements
1189 # =====================
1190
1191 class decoration(Decorative, Element):
1192
1193     def get_header(self):
1194         if not len(self.children) or not isinstance(self.children[0], header):
1195             self.insert(0, header())
1196         return self.children[0]
1197
1198     def get_footer(self):
1199         if not len(self.children) or not isinstance(self.children[-1], footer):
1200             self.append(footer())
1201         return self.children[-1]
1202
1203
1204 class header(Decorative, Element): pass
1205 class footer(Decorative, Element): pass
1206
1207
1208 # =====================
1209 #  Structural Elements
1210 # =====================
1211
1212 class section(Structural, Element): pass
1213
1214
1215 class topic(Structural, Element):
1216
1217     """
1218     Topics are terminal, "leaf" mini-sections, like block quotes with titles,
1219     or textual figures.  A topic is just like a section, except that it has no
1220     subsections, and it doesn't have to conform to section placement rules.
1221
1222     Topics are allowed wherever body elements (list, table, etc.) are allowed,
1223     but only at the top level of a section or document.  Topics cannot nest
1224     inside topics, sidebars, or body elements; you can't have a topic inside a
1225     table, list, block quote, etc.
1226     """
1227
1228
1229 class sidebar(Structural, Element):
1230
1231     """
1232     Sidebars are like miniature, parallel documents that occur inside other
1233     documents, providing related or reference material.  A sidebar is
1234     typically offset by a border and "floats" to the side of the page; the
1235     document's main text may flow around it.  Sidebars can also be likened to
1236     super-footnotes; their content is outside of the flow of the document's
1237     main text.
1238
1239     Sidebars are allowed wherever body elements (list, table, etc.) are
1240     allowed, but only at the top level of a section or document.  Sidebars
1241     cannot nest inside sidebars, topics, or body elements; you can't have a
1242     sidebar inside a table, list, block quote, etc.
1243     """
1244
1245
1246 class transition(Structural, Element): pass
1247
1248
1249 # ===============
1250 #  Body Elements
1251 # ===============
1252
1253 class paragraph(General, TextElement): pass
1254 class compound(General, Element): pass
1255 class container(General, Element): pass
1256 class bullet_list(Sequential, Element): pass
1257 class enumerated_list(Sequential, Element): pass
1258 class list_item(Part, Element): pass
1259 class definition_list(Sequential, Element): pass
1260 class definition_list_item(Part, Element): pass
1261 class term(Part, TextElement): pass
1262 class classifier(Part, TextElement): pass
1263 class definition(Part, Element): pass
1264 class field_list(Sequential, Element): pass
1265 class field(Part, Element): pass
1266 class field_name(Part, TextElement): pass
1267 class field_body(Part, Element): pass
1268
1269
1270 class option(Part, Element):
1271
1272     child_text_separator = ''
1273
1274
1275 class option_argument(Part, TextElement):
1276
1277     def astext(self):
1278         return self.get('delimiter', ' ') + TextElement.astext(self)
1279
1280
1281 class option_group(Part, Element):
1282
1283     child_text_separator = ', '
1284
1285
1286 class option_list(Sequential, Element): pass
1287
1288
1289 class option_list_item(Part, Element):
1290
1291     child_text_separator = '  '
1292
1293
1294 class option_string(Part, TextElement): pass
1295 class description(Part, Element): pass
1296 class literal_block(General, FixedTextElement): pass
1297 class doctest_block(General, FixedTextElement): pass
1298 class line_block(General, Element): pass
1299
1300
1301 class line(Part, TextElement):
1302
1303     indent = None
1304
1305
1306 class block_quote(General, Element): pass
1307 class attribution(Part, TextElement): pass
1308 class attention(Admonition, Element): pass
1309 class caution(Admonition, Element): pass
1310 class danger(Admonition, Element): pass
1311 class error(Admonition, Element): pass
1312 class important(Admonition, Element): pass
1313 class note(Admonition, Element): pass
1314 class tip(Admonition, Element): pass
1315 class hint(Admonition, Element): pass
1316 class warning(Admonition, Element): pass
1317 class admonition(Admonition, Element): pass
1318 class comment(Special, Invisible, FixedTextElement): pass
1319 class substitution_definition(Special, Invisible, TextElement): pass
1320 class target(Special, Invisible, Inline, TextElement, Targetable): pass
1321 class footnote(General, BackLinkable, Element, Labeled, Targetable): pass
1322 class citation(General, BackLinkable, Element, Labeled, Targetable): pass
1323 class label(Part, TextElement): pass
1324 class figure(General, Element): pass
1325 class caption(Part, TextElement): pass
1326 class legend(Part, Element): pass
1327 class table(General, Element): pass
1328 class tgroup(Part, Element): pass
1329 class colspec(Part, Element): pass
1330 class thead(Part, Element): pass
1331 class tbody(Part, Element): pass
1332 class row(Part, Element): pass
1333 class entry(Part, Element): pass
1334
1335
1336 class system_message(Special, BackLinkable, PreBibliographic, Element):
1337
1338     """
1339     System message element.
1340
1341     Do not instantiate this class directly; use
1342     ``document.reporter.info/warning/error/severe()`` instead.
1343     """
1344
1345     def __init__(self, message=None, *children, **attributes):
1346         if message:
1347             p = paragraph('', message)
1348             children = (p,) + children
1349         try:
1350             Element.__init__(self, '', *children, **attributes)
1351         except:
1352             print 'system_message: children=%r' % (children,)
1353             raise
1354
1355     def astext(self):
1356         line = self.get('line', '')
1357         return u'%s:%s: (%s/%s) %s' % (self['source'], line, self['type'],
1358                                        self['level'], Element.astext(self))
1359
1360
1361 class pending(Special, Invisible, Element):
1362
1363     """
1364     The "pending" element is used to encapsulate a pending operation: the
1365     operation (transform), the point at which to apply it, and any data it
1366     requires.  Only the pending operation's location within the document is
1367     stored in the public document tree (by the "pending" object itself); the
1368     operation and its data are stored in the "pending" object's internal
1369     instance attributes.
1370
1371     For example, say you want a table of contents in your reStructuredText
1372     document.  The easiest way to specify where to put it is from within the
1373     document, with a directive::
1374
1375         .. contents::
1376
1377     But the "contents" directive can't do its work until the entire document
1378     has been parsed and possibly transformed to some extent.  So the directive
1379     code leaves a placeholder behind that will trigger the second phase of its
1380     processing, something like this::
1381
1382         <pending ...public attributes...> + internal attributes
1383
1384     Use `document.note_pending()` so that the
1385     `docutils.transforms.Transformer` stage of processing can run all pending
1386     transforms.
1387     """
1388
1389     def __init__(self, transform, details=None,
1390                  rawsource='', *children, **attributes):
1391         Element.__init__(self, rawsource, *children, **attributes)
1392
1393         self.transform = transform
1394         """The `docutils.transforms.Transform` class implementing the pending
1395         operation."""
1396
1397         self.details = details or {}
1398         """Detail data (dictionary) required by the pending operation."""
1399
1400     def pformat(self, indent='    ', level=0):
1401         internals = [
1402               '.. internal attributes:',
1403               '     .transform: %s.%s' % (self.transform.__module__,
1404                                           self.transform.__name__),
1405               '     .details:']
1406         details = self.details.items()
1407         details.sort()
1408         for key, value in details:
1409             if isinstance(value, Node):
1410                 internals.append('%7s%s:' % ('', key))
1411                 internals.extend(['%9s%s' % ('', line)
1412                                   for line in value.pformat().splitlines()])
1413             elif value and isinstance(value, list) \
1414                   and isinstance(value[0], Node):
1415                 internals.append('%7s%s:' % ('', key))
1416                 for v in value:
1417                     internals.extend(['%9s%s' % ('', line)
1418                                       for line in v.pformat().splitlines()])
1419             else:
1420                 internals.append('%7s%s: %r' % ('', key, value))
1421         return (Element.pformat(self, indent, level)
1422                 + ''.join([('    %s%s\n' % (indent * level, line))
1423                            for line in internals]))
1424
1425     def copy(self):
1426         return self.__class__(self.transform, self.details, self.rawsource,
1427                               **self.attributes)
1428
1429
1430 class raw(Special, Inline, PreBibliographic, FixedTextElement):
1431
1432     """
1433     Raw data that is to be passed untouched to the Writer.
1434     """
1435
1436     pass
1437
1438
1439 # =================
1440 #  Inline Elements
1441 # =================
1442
1443 class emphasis(Inline, TextElement): pass
1444 class strong(Inline, TextElement): pass
1445 class literal(Inline, TextElement): pass
1446 class reference(General, Inline, Referential, TextElement): pass
1447 class footnote_reference(Inline, Referential, TextElement): pass
1448 class citation_reference(Inline, Referential, TextElement): pass
1449 class substitution_reference(Inline, TextElement): pass
1450 class title_reference(Inline, TextElement): pass
1451 class abbreviation(Inline, TextElement): pass
1452 class acronym(Inline, TextElement): pass
1453 class superscript(Inline, TextElement): pass
1454 class subscript(Inline, TextElement): pass
1455
1456
1457 class image(General, Inline, Element):
1458
1459     def astext(self):
1460         return self.get('alt', '')
1461
1462
1463 class inline(Inline, TextElement): pass
1464 class problematic(Inline, TextElement): pass
1465 class generated(Inline, TextElement): pass
1466
1467
1468 # ========================================
1469 #  Auxiliary Classes, Functions, and Data
1470 # ========================================
1471
1472 node_class_names = """
1473     Text
1474     abbreviation acronym address admonition attention attribution author
1475         authors
1476     block_quote bullet_list
1477     caption caution citation citation_reference classifier colspec comment
1478         compound contact container copyright
1479     danger date decoration definition definition_list definition_list_item
1480         description docinfo doctest_block document
1481     emphasis entry enumerated_list error
1482     field field_body field_list field_name figure footer
1483         footnote footnote_reference
1484     generated
1485     header hint
1486     image important inline
1487     label legend line line_block list_item literal literal_block
1488     note
1489     option option_argument option_group option_list option_list_item
1490         option_string organization
1491     paragraph pending problematic
1492     raw reference revision row rubric
1493     section sidebar status strong subscript substitution_definition
1494         substitution_reference subtitle superscript system_message
1495     table target tbody term tgroup thead tip title title_reference topic
1496         transition
1497     version
1498     warning""".split()
1499 """A list of names of all concrete Node subclasses."""
1500
1501
1502 class NodeVisitor:
1503
1504     """
1505     "Visitor" pattern [GoF95]_ abstract superclass implementation for
1506     document tree traversals.
1507
1508     Each node class has corresponding methods, doing nothing by
1509     default; override individual methods for specific and useful
1510     behaviour.  The `dispatch_visit()` method is called by
1511     `Node.walk()` upon entering a node.  `Node.walkabout()` also calls
1512     the `dispatch_departure()` method before exiting a node.
1513
1514     The dispatch methods call "``visit_`` + node class name" or
1515     "``depart_`` + node class name", resp.
1516
1517     This is a base class for visitors whose ``visit_...`` & ``depart_...``
1518     methods should be implemented for *all* node types encountered (such as
1519     for `docutils.writers.Writer` subclasses).  Unimplemented methods will
1520     raise exceptions.
1521
1522     For sparse traversals, where only certain node types are of interest,
1523     subclass `SparseNodeVisitor` instead.  When (mostly or entirely) uniform
1524     processing is desired, subclass `GenericNodeVisitor`.
1525
1526     .. [GoF95] Gamma, Helm, Johnson, Vlissides. *Design Patterns: Elements of
1527        Reusable Object-Oriented Software*. Addison-Wesley, Reading, MA, USA,
1528        1995.
1529     """
1530
1531     optional = ()
1532     """
1533     Tuple containing node class names (as strings).
1534
1535     No exception will be raised if writers do not implement visit
1536     or departure functions for these node classes.
1537
1538     Used to ensure transitional compatibility with existing 3rd-party writers.
1539     """
1540
1541     def __init__(self, document):
1542         self.document = document
1543
1544     def dispatch_visit(self, node):
1545         """
1546         Call self."``visit_`` + node class name" with `node` as
1547         parameter.  If the ``visit_...`` method does not exist, call
1548         self.unknown_visit.
1549         """
1550         node_name = node.__class__.__name__
1551         method = getattr(self, 'visit_' + node_name, self.unknown_visit)
1552         self.document.reporter.debug(
1553             'docutils.nodes.NodeVisitor.dispatch_visit calling %s for %s'
1554             % (method.__name__, node_name))
1555         return method(node)
1556
1557     def dispatch_departure(self, node):
1558         """
1559         Call self."``depart_`` + node class name" with `node` as
1560         parameter.  If the ``depart_...`` method does not exist, call
1561         self.unknown_departure.
1562         """
1563         node_name = node.__class__.__name__
1564         method = getattr(self, 'depart_' + node_name, self.unknown_departure)
1565         self.document.reporter.debug(
1566             'docutils.nodes.NodeVisitor.dispatch_departure calling %s for %s'
1567             % (method.__name__, node_name))
1568         return method(node)
1569
1570     def unknown_visit(self, node):
1571         """
1572         Called when entering unknown `Node` types.
1573
1574         Raise an exception unless overridden.
1575         """
1576         if  (node.document.settings.strict_visitor
1577              or node.__class__.__name__ not in self.optional):
1578             raise NotImplementedError(
1579                 '%s visiting unknown node type: %s'
1580                 % (self.__class__, node.__class__.__name__))
1581
1582     def unknown_departure(self, node):
1583         """
1584         Called before exiting unknown `Node` types.
1585
1586         Raise exception unless overridden.
1587         """
1588         if  (node.document.settings.strict_visitor
1589              or node.__class__.__name__ not in self.optional):
1590             raise NotImplementedError(
1591                 '%s departing unknown node type: %s'
1592                 % (self.__class__, node.__class__.__name__))
1593
1594
1595 class SparseNodeVisitor(NodeVisitor):
1596
1597     """
1598     Base class for sparse traversals, where only certain node types are of
1599     interest.  When ``visit_...`` & ``depart_...`` methods should be
1600     implemented for *all* node types (such as for `docutils.writers.Writer`
1601     subclasses), subclass `NodeVisitor` instead.
1602     """
1603
1604
1605 class GenericNodeVisitor(NodeVisitor):
1606
1607     """
1608     Generic "Visitor" abstract superclass, for simple traversals.
1609
1610     Unless overridden, each ``visit_...`` method calls `default_visit()`, and
1611     each ``depart_...`` method (when using `Node.walkabout()`) calls
1612     `default_departure()`. `default_visit()` (and `default_departure()`) must
1613     be overridden in subclasses.
1614
1615     Define fully generic visitors by overriding `default_visit()` (and
1616     `default_departure()`) only. Define semi-generic visitors by overriding
1617     individual ``visit_...()`` (and ``depart_...()``) methods also.
1618
1619     `NodeVisitor.unknown_visit()` (`NodeVisitor.unknown_departure()`) should
1620     be overridden for default behavior.
1621     """
1622
1623     def default_visit(self, node):
1624         """Override for generic, uniform traversals."""
1625         raise NotImplementedError
1626
1627     def default_departure(self, node):
1628         """Override for generic, uniform traversals."""
1629         raise NotImplementedError
1630
1631 def _call_default_visit(self, node):
1632     self.default_visit(node)
1633
1634 def _call_default_departure(self, node):
1635     self.default_departure(node)
1636
1637 def _nop(self, node):
1638     pass
1639
1640 def _add_node_class_names(names):
1641     """Save typing with dynamic assignments:"""
1642     for _name in names:
1643         setattr(GenericNodeVisitor, "visit_" + _name, _call_default_visit)
1644         setattr(GenericNodeVisitor, "depart_" + _name, _call_default_departure)
1645         setattr(SparseNodeVisitor, 'visit_' + _name, _nop)
1646         setattr(SparseNodeVisitor, 'depart_' + _name, _nop)
1647
1648 _add_node_class_names(node_class_names)
1649
1650
1651 class TreeCopyVisitor(GenericNodeVisitor):
1652
1653     """
1654     Make a complete copy of a tree or branch, including element attributes.
1655     """
1656
1657     def __init__(self, document):
1658         GenericNodeVisitor.__init__(self, document)
1659         self.parent_stack = []
1660         self.parent = []
1661
1662     def get_tree_copy(self):
1663         return self.parent[0]
1664
1665     def default_visit(self, node):
1666         """Copy the current node, and make it the new acting parent."""
1667         newnode = node.copy()
1668         self.parent.append(newnode)
1669         self.parent_stack.append(self.parent)
1670         self.parent = newnode
1671
1672     def default_departure(self, node):
1673         """Restore the previous acting parent."""
1674         self.parent = self.parent_stack.pop()
1675
1676
1677 class TreePruningException(Exception):
1678
1679     """
1680     Base class for `NodeVisitor`-related tree pruning exceptions.
1681
1682     Raise subclasses from within ``visit_...`` or ``depart_...`` methods
1683     called from `Node.walk()` and `Node.walkabout()` tree traversals to prune
1684     the tree traversed.
1685     """
1686
1687     pass
1688
1689
1690 class SkipChildren(TreePruningException):
1691
1692     """
1693     Do not visit any children of the current node.  The current node's
1694     siblings and ``depart_...`` method are not affected.
1695     """
1696
1697     pass
1698
1699
1700 class SkipSiblings(TreePruningException):
1701
1702     """
1703     Do not visit any more siblings (to the right) of the current node.  The
1704     current node's children and its ``depart_...`` method are not affected.
1705     """
1706
1707     pass
1708
1709
1710 class SkipNode(TreePruningException):
1711
1712     """
1713     Do not visit the current node's children, and do not call the current
1714     node's ``depart_...`` method.
1715     """
1716
1717     pass
1718
1719
1720 class SkipDeparture(TreePruningException):
1721
1722     """
1723     Do not call the current node's ``depart_...`` method.  The current node's
1724     children and siblings are not affected.
1725     """
1726
1727     pass
1728
1729
1730 class NodeFound(TreePruningException):
1731
1732     """
1733     Raise to indicate that the target of a search has been found.  This
1734     exception must be caught by the client; it is not caught by the traversal
1735     code.
1736     """
1737
1738     pass
1739
1740
1741 class StopTraversal(TreePruningException):
1742
1743     """
1744     Stop the traversal alltogether.  The current node's ``depart_...`` method
1745     is not affected.  The parent nodes ``depart_...`` methods are also called
1746     as usual.  No other nodes are visited.  This is an alternative to
1747     NodeFound that does not cause exception handling to trickle up to the
1748     caller.
1749     """
1750
1751     pass
1752
1753
1754 def make_id(string):
1755     """
1756     Convert `string` into an identifier and return it.
1757
1758     Docutils identifiers will conform to the regular expression
1759     ``[a-z](-?[a-z0-9]+)*``.  For CSS compatibility, identifiers (the "class"
1760     and "id" attributes) should have no underscores, colons, or periods.
1761     Hyphens may be used.
1762
1763     - The `HTML 4.01 spec`_ defines identifiers based on SGML tokens:
1764
1765           ID and NAME tokens must begin with a letter ([A-Za-z]) and may be
1766           followed by any number of letters, digits ([0-9]), hyphens ("-"),
1767           underscores ("_"), colons (":"), and periods (".").
1768
1769     - However the `CSS1 spec`_ defines identifiers based on the "name" token,
1770       a tighter interpretation ("flex" tokenizer notation; "latin1" and
1771       "escape" 8-bit characters have been replaced with entities)::
1772
1773           unicode     \\[0-9a-f]{1,4}
1774           latin1      [&iexcl;-&yuml;]
1775           escape      {unicode}|\\[ -~&iexcl;-&yuml;]
1776           nmchar      [-a-z0-9]|{latin1}|{escape}
1777           name        {nmchar}+
1778
1779     The CSS1 "nmchar" rule does not include underscores ("_"), colons (":"),
1780     or periods ("."), therefore "class" and "id" attributes should not contain
1781     these characters. They should be replaced with hyphens ("-"). Combined
1782     with HTML's requirements (the first character must be a letter; no
1783     "unicode", "latin1", or "escape" characters), this results in the
1784     ``[a-z](-?[a-z0-9]+)*`` pattern.
1785
1786     .. _HTML 4.01 spec: http://www.w3.org/TR/html401
1787     .. _CSS1 spec: http://www.w3.org/TR/REC-CSS1
1788     """
1789     id = string.lower()
1790     if not isinstance(id, unicode):
1791         id = id.decode()
1792     try:
1793         id = id.translate(_non_id_translate_digraphs)
1794     except (NotImplementedError):
1795         # unicode.translate(dict) does not support 1-n-mappings in Python 2.2
1796         pass
1797     id = id.translate(_non_id_translate)
1798     try:
1799         id = unicodedata.normalize('NFKD', id).encode('ASCII', 'ignore')
1800     except (AttributeError):
1801         # unicodedata.normalize not supported in Python 2.2
1802         pass
1803     # shrink runs of whitespace and replace by hyphen
1804     id = _non_id_chars.sub('-', ' '.join(id.split()))
1805     id = _non_id_at_ends.sub('', id)
1806     return str(id)
1807
1808 _non_id_chars = re.compile('[^a-z0-9]+')
1809 _non_id_at_ends = re.compile('^[-0-9]+|-+$')
1810 _non_id_translate = {
1811     0x00f8: u'o',       # o with stroke
1812     0x0111: u'd',       # d with stroke
1813     0x0127: u'h',       # h with stroke
1814     0x0131: u'i',       # dotless i
1815     0x0142: u'l',       # l with stroke
1816     0x0167: u't',       # t with stroke
1817     0x0180: u'b',       # b with stroke
1818     0x0183: u'b',       # b with topbar
1819     0x0188: u'c',       # c with hook
1820     0x018c: u'd',       # d with topbar
1821     0x0192: u'f',       # f with hook
1822     0x0199: u'k',       # k with hook
1823     0x019a: u'l',       # l with bar
1824     0x019e: u'n',       # n with long right leg
1825     0x01a5: u'p',       # p with hook
1826     0x01ab: u't',       # t with palatal hook
1827     0x01ad: u't',       # t with hook
1828     0x01b4: u'y',       # y with hook
1829     0x01b6: u'z',       # z with stroke
1830     0x01e5: u'g',       # g with stroke
1831     0x0225: u'z',       # z with hook
1832     0x0234: u'l',       # l with curl
1833     0x0235: u'n',       # n with curl
1834     0x0236: u't',       # t with curl
1835     0x0237: u'j',       # dotless j
1836     0x023c: u'c',       # c with stroke
1837     0x023f: u's',       # s with swash tail
1838     0x0240: u'z',       # z with swash tail
1839     0x0247: u'e',       # e with stroke
1840     0x0249: u'j',       # j with stroke
1841     0x024b: u'q',       # q with hook tail
1842     0x024d: u'r',       # r with stroke
1843     0x024f: u'y',       # y with stroke
1844 }
1845 _non_id_translate_digraphs = {
1846     0x00df: u'sz',      # ligature sz
1847     0x00e6: u'ae',      # ae
1848     0x0153: u'oe',      # ligature oe
1849     0x0238: u'db',      # db digraph
1850     0x0239: u'qp',      # qp digraph
1851 }
1852
1853 def dupname(node, name):
1854     node['dupnames'].append(name)
1855     node['names'].remove(name)
1856     # Assume that this method is referenced, even though it isn't; we
1857     # don't want to throw unnecessary system_messages.
1858     node.referenced = 1
1859
1860 def fully_normalize_name(name):
1861     """Return a case- and whitespace-normalized name."""
1862     return ' '.join(name.lower().split())
1863
1864 def whitespace_normalize_name(name):
1865     """Return a whitespace-normalized name."""
1866     return ' '.join(name.split())
1867
1868 def serial_escape(value):
1869     """Escape string values that are elements of a list, for serialization."""
1870     return value.replace('\\', r'\\').replace(' ', r'\ ')
1871
1872 # \f
1873 #
1874 # Local Variables:
1875 # indent-tabs-mode: nil
1876 # sentence-end-double-space: t
1877 # fill-column: 78
1878 # End: