docutils/writers/html_plain/__init__.py

   1 # .. coding: utf8
   2 # :Author: Günter Milde <milde@users.berlios.de>
   3 # :Revision: $Revision$
   4 # :Date: $Date: 2005-06-28$
   5 # :Copyright: © 2005, 2009 Günter Milde.
   6 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
   7 #
   8 #    Copying and distribution of this file, with or without modification,
   9 #    are permitted in any medium without royalty provided the copyright
  10 #    notice and this notice are preserved.
  11 #    This file is offered as-is, without any warranty.
  12 #
  13 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
  14
  15 # Use "best practice" as recommended by the W3C:
  16 # http://www.w3.org/2009/cheatsheet/
  17
  18
  19 """
  20 Basic HyperText Markup Language document tree Writer.
  21
  22 The output conforms to the `HTML 5` specification as well as
  23 to `XHTML 1.0 transitional`.
  24
  25 The cascading style sheet "minimal.css" is required for proper viewing,
  26 the style sheet "plain.css" provides a better reading experience.
  27 """
  28 __docformat__ = 'reStructuredText'
  29
  30 import sys
  31 import os
  32 import os.path
  33 import re
  34 import urllib
  35 try: # check for the Python Imaging Library
  36     import PIL.Image
  37 except ImportError:
  38     try:  # sometimes PIL modules are put in PYTHONPATH's root
  39         import Image
  40         class PIL(object): pass  # dummy wrapper
  41         PIL.Image = Image
  42     except ImportError:
  43         PIL = None
  44 import docutils
  45 from docutils import frontend, nodes, utils, writers, languages, io
  46 from docutils.utils.error_reporting import SafeString
  47 from docutils.transforms import writer_aux
  48 from docutils.utils.math import (unichar2tex, pick_math_environment,
  49                                  math2html, latex2mathml, tex2mathml_extern)
  50
  51 class Writer(writers.Writer):
  52
  53     supported = ('html', 'html5', 'html4', 'xhtml', 'xhtml10')
  54     """Formats this writer supports."""
  55
  56     default_stylesheets = ['minimal.css','plain.css']
  57     default_stylesheet_dirs = ['.', os.path.abspath(os.path.dirname(__file__))]
  58
  59     default_template = 'template.txt'
  60     default_template_path = os.path.join(
  61         os.path.dirname(os.path.abspath(__file__)), default_template)
  62
  63     settings_spec = (
  64         'HTML-Specific Options',
  65         None,
  66         (('Specify the template file (UTF-8 encoded).  Default is "%s".'
  67           % default_template_path,
  68           ['--template'],
  69           {'default': default_template_path, 'metavar': '<file>'}),
  70          ('Comma separated list of stylesheet URLs. '
  71           'Overrides previous --stylesheet and --stylesheet-path settings.',
  72           ['--stylesheet'],
  73           {'metavar': '<URL[,URL,...]>', 'overrides': 'stylesheet_path',
  74            'validator': frontend.validate_comma_separated_list}),
  75          ('Comma separated list of stylesheet paths. '
  76           'Relative paths are expanded if a matching file is found in '
  77           'the --stylesheet-dirs. With --link-stylesheet, '
  78           'the path is rewritten relative to the output HTML file. '
  79           'Default: "%s"' % ','.join(default_stylesheets),
  80           ['--stylesheet-path'],
  81           {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
  82            'validator': frontend.validate_comma_separated_list,
  83            'default': default_stylesheets}),
  84          ('Embed the stylesheet(s) in the output HTML file.  The stylesheet '
  85           'files must be accessible during processing. This is the default.',
  86           ['--embed-stylesheet'],
  87           {'default': 1, 'action': 'store_true',
  88            'validator': frontend.validate_boolean}),
  89          ('Link to the stylesheet(s) in the output HTML file. '
  90           'Default: embed stylesheets.',
  91           ['--link-stylesheet'],
  92           {'dest': 'embed_stylesheet', 'action': 'store_false'}),
  93          ('Comma-separated list of directories where stylesheets are found. '
  94           'Used by --stylesheet-path when expanding relative path arguments. '
  95           'Default: "%s"' % default_stylesheet_dirs,
  96           ['--stylesheet-dirs'],
  97           {'metavar': '<dir[,dir,...]>',
  98            'validator': frontend.validate_comma_separated_list,
  99            'default': default_stylesheet_dirs}),
 100          ('Specify the initial header level.  Default is 1 for "<h1>".  '
 101           'Does not affect document title & subtitle (see --no-doc-title).',
 102           ['--initial-header-level'],
 103           {'choices': '1 2 3 4 5 6'.split(), 'default': '1',
 104            'metavar': '<level>'}),
 105          ('Format for footnote references: one of "superscript" or '
 106           '"brackets".  Default is "brackets".',
 107           ['--footnote-references'],
 108           {'choices': ['superscript', 'brackets'], 'default': 'brackets',
 109            'metavar': '<format>',
 110            'overrides': 'trim_footnote_reference_space'}),
 111          ('Format for block quote attributions: one of "dash" (em-dash '
 112           'prefix), "parentheses"/"parens", or "none".  Default is "dash".',
 113           ['--attribution'],
 114           {'choices': ['dash', 'parentheses', 'parens', 'none'],
 115            'default': 'dash', 'metavar': '<format>'}),
 116          ('Remove extra vertical whitespace between items of "simple" bullet '
 117           'lists and enumerated lists.  Default: enabled.',
 118           ['--compact-lists'],
 119           {'default': True, 'action': 'store_true',
 120            'validator': frontend.validate_boolean}),
 121          ('Disable compact simple bullet and enumerated lists.',
 122           ['--no-compact-lists'],
 123           {'dest': 'compact_lists', 'action': 'store_false'}),
 124          ('Remove extra vertical whitespace between items of simple field '
 125           'lists.  Default: enabled.',
 126           ['--compact-field-lists'],
 127           {'default': True, 'action': 'store_true',
 128            'validator': frontend.validate_boolean}),
 129          ('Disable compact simple field lists.',
 130           ['--no-compact-field-lists'],
 131           {'dest': 'compact_field_lists', 'action': 'store_false'}),
 132          ('Added to standard table classes. '
 133           'Defined styles: "borderless". Default: ""',
 134           ['--table-style'],
 135           {'default': ''}),
 136          ('Math output format (one of "MathML", "HTML", "MathJax", '
 137           'or "LaTeX") and option(s). '
 138           'Default: "HTML math.css"',
 139           ['--math-output'],
 140           {'default': 'HTML math.css'}),
 141          ('Prepend an XML declaration. (Thwarts HTML5 conformance.) '
 142           'Default: False',
 143           ['--xml-declaration'],
 144           {'default': False, 'action': 'store_true',
 145            'validator': frontend.validate_boolean}),
 146          ('Omit the XML declaration.',
 147           ['--no-xml-declaration'],
 148           {'dest': 'xml_declaration', 'action': 'store_false'}),
 149          ('Obfuscate email addresses to confuse harvesters while still '
 150           'keeping email links usable with standards-compliant browsers.',
 151           ['--cloak-email-addresses'],
 152           {'action': 'store_true', 'validator': frontend.validate_boolean}),))
 153
 154     settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'}
 155
 156     config_section = 'html-base writer'
 157     config_section_dependencies = ('writers',)
 158
 159     visitor_attributes = (
 160         'head_prefix', 'head', 'stylesheet', 'body_prefix',
 161         'body_pre_docinfo', 'docinfo', 'body', 'body_suffix',
 162         'title', 'subtitle', 'header', 'footer', 'meta', 'fragment',
 163         'html_prolog', 'html_head', 'html_title', 'html_subtitle',
 164         'html_body')
 165
 166     def get_transforms(self):
 167         return writers.Writer.get_transforms(self) + [writer_aux.Admonitions]
 168
 169     def __init__(self):
 170         writers.Writer.__init__(self)
 171         self.translator_class = HTMLTranslator
 172
 173     def translate(self):
 174         self.visitor = visitor = self.translator_class(self.document)
 175         self.document.walkabout(visitor)
 176         for attr in self.visitor_attributes:
 177             setattr(self, attr, getattr(visitor, attr))
 178         self.output = self.apply_template()
 179
 180     def apply_template(self):
 181         template_file = open(self.document.settings.template, 'rb')
 182         template = unicode(template_file.read(), 'utf-8')
 183         template_file.close()
 184         subs = self.interpolation_dict()
 185         return template % subs
 186
 187     def interpolation_dict(self):
 188         subs = {}
 189         settings = self.document.settings
 190         for attr in self.visitor_attributes:
 191             subs[attr] = ''.join(getattr(self, attr)).rstrip('\n')
 192         subs['encoding'] = settings.output_encoding
 193         subs['version'] = docutils.__version__
 194         return subs
 195
 196     def assemble_parts(self):
 197         writers.Writer.assemble_parts(self)
 198         for part in self.visitor_attributes:
 199             self.parts[part] = ''.join(getattr(self, part))
 200
 201
 202 class HTMLTranslator(nodes.NodeVisitor):
 203
 204     """
 205     This writer generates `polyglott markup`: HTML 5 that is also valid XML.
 206     """
 207
 208     xml_declaration = '<?xml version="1.0" encoding="%s" ?>\n'
 209     doctype = (
 210         '<!DOCTYPE html>\n')
 211     doctype_mathml = doctype
 212
 213     head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
 214                             ' xml:lang="%(lang)s" lang="%(lang)s">\n<head>\n')
 215     content_type = ('<meta http-equiv="Content-Type"'
 216                     ' content="text/html; charset=%s" />\n')
 217     content_type_xml = ('<meta http-equiv="Content-Type"'
 218                         ' content="application/xhtml+xml; charset=%s" />\n')
 219
 220     generator = ('<meta name="generator" content="Docutils %s: '
 221                  'http://docutils.sourceforge.net/" />\n')
 222
 223     # Template for the MathJax script in the header:
 224     mathjax_script = '<script type="text/javascript" src="%s"></script>\n'
 225     # The latest version of MathJax from the distributed server:
 226     # avaliable to the public under the `MathJax CDN Terms of Service`__
 227     # __http://www.mathjax.org/download/mathjax-cdn-terms-of-service/
 228     mathjax_url = ('https://cdn.mathjax.org/mathjax/latest/MathJax.js?'
 229                    'config=TeX-AMS_CHTML')
 230     # may be overwritten by custom URL appended to "mathjax"
 231
 232     stylesheet_link = '<link rel="stylesheet" href="%s" type="text/css" />\n'
 233     embedded_stylesheet = '<style type="text/css">\n\n%s\n</style>\n'
 234     words_and_spaces = re.compile(r'\S+| +|\n')
 235     sollbruchstelle = re.compile(r'.+\W\W.+|[-?].+', re.U) # wrap point inside word
 236     lang_attribute = 'lang' # name changes to 'xml:lang' in XHTML 1.1
 237
 238     def __init__(self, document):
 239         nodes.NodeVisitor.__init__(self, document)
 240         self.settings = settings = document.settings
 241         lcode = settings.language_code
 242         self.language = languages.get_language(lcode, document.reporter)
 243         self.meta = [self.generator % docutils.__version__]
 244         self.head_prefix = []
 245         self.html_prolog = []
 246         if settings.xml_declaration:
 247             self.head_prefix.append(self.xml_declaration
 248                                     % settings.output_encoding)
 249             self.content_type = self.content_type_xml
 250             # encoding not interpolated:
 251             self.html_prolog.append(self.xml_declaration)
 252         self.head = self.meta[:]
 253         self.stylesheet = [self.stylesheet_call(path)
 254                            for path in utils.get_stylesheet_list(settings)]
 255         self.body_prefix = ['</head>\n<body>\n']
 256         # document title, subtitle display
 257         self.body_pre_docinfo = []
 258         # author, date, etc.
 259         self.docinfo = []
 260         self.body = []
 261         self.fragment = []
 262         self.body_suffix = ['</body>\n</html>\n']
 263         self.section_level = 0
 264         self.initial_header_level = int(settings.initial_header_level)
 265
 266         self.math_output = settings.math_output.split()
 267         self.math_output_options = self.math_output[1:]
 268         self.math_output = self.math_output[0].lower()
 269
 270         # A heterogenous stack used in conjunction with the tree traversal.
 271         # Make sure that the pops correspond to the pushes:
 272         self.context = []
 273
 274         self.topic_classes = [] # TODO: replace with self_in_contents
 275         self.colspecs = []
 276         self.compact_p = True
 277         self.compact_simple = False
 278         self.compact_field_list = False
 279         self.in_docinfo = False
 280         self.in_sidebar = False
 281         self.in_footnote_list = False
 282         self.title = []
 283         self.subtitle = []
 284         self.header = []
 285         self.footer = []
 286         self.html_head = [self.content_type] # charset not interpolated
 287         self.html_title = []
 288         self.html_subtitle = []
 289         self.html_body = []
 290         self.in_document_title = 0   # len(self.body) or 0
 291         self.in_mailto = False
 292         self.author_in_authors = False
 293         self.math_header = []
 294
 295     def astext(self):
 296         return ''.join(self.head_prefix + self.head
 297                        + self.stylesheet + self.body_prefix
 298                        + self.body_pre_docinfo + self.docinfo
 299                        + self.body + self.body_suffix)
 300
 301     def encode(self, text):
 302         """Encode special characters in `text` & return."""
 303         # Use only named entities known in both XML and HTML
 304         # other characters are automatically encoded "by number" if required.
 305         text = unicode(text)
 306         return text.translate({
 307             ord('&'): u'&amp;',
 308             ord('<'): u'&lt;',
 309             ord('"'): u'&quot;',
 310             ord('>'): u'&gt;',
 311             ord('@'): u'&#64;', # may thwart some address harvesters
 312                               })
 313
 314     def cloak_mailto(self, uri):
 315         """Try to hide a mailto: URL from harvesters."""
 316         # Encode "@" using a URL octet reference (see RFC 1738).
 317         # Further cloaking with HTML entities will be done in the
 318         # `attval` function.
 319         return uri.replace('@', '%40')
 320
 321     def cloak_email(self, addr):
 322         """Try to hide the link text of a email link from harversters."""
 323         # Surround at-signs and periods with <span> tags.  ("@" has
 324         # already been encoded to "&#64;" by the `encode` method.)
 325         addr = addr.replace('&#64;', '<span>&#64;</span>')
 326         addr = addr.replace('.', '<span>&#46;</span>')
 327         return addr
 328
 329     def attval(self, text,
 330                whitespace=re.compile('[\n\r\t\v\f]')):
 331         """Cleanse, HTML encode, and return attribute value text."""
 332         encoded = self.encode(whitespace.sub(' ', text))
 333         if self.in_mailto and self.settings.cloak_email_addresses:
 334             # Cloak at-signs ("%40") and periods with HTML entities.
 335             encoded = encoded.replace('%40', '&#37;&#52;&#48;')
 336             encoded = encoded.replace('.', '&#46;')
 337         return encoded
 338
 339     def stylesheet_call(self, path):
 340         """Return code to reference or embed stylesheet file `path`"""
 341         if self.settings.embed_stylesheet:
 342             try:
 343                 content = io.FileInput(source_path=path,
 344                                        encoding='utf-8').read()
 345                 self.settings.record_dependencies.add(path)
 346             except IOError, err:
 347                 msg = u"Cannot embed stylesheet '%s': %s." % (
 348                                 path, SafeString(err.strerror))
 349                 self.document.reporter.error(msg)
 350                 return '<--- %s --->\n' % msg
 351             return self.embedded_stylesheet % content
 352         # else link to style file:
 353         if self.settings.stylesheet_path:
 354             # adapt path relative to output (cf. config.html#stylesheet-path)
 355             path = utils.relative_path(self.settings._destination, path)
 356         return self.stylesheet_link % self.encode(path)
 357
 358     def starttag(self, node, tagname, suffix='\n', empty=False, **attributes):
 359         """
 360         Construct and return a start tag given a node (id & class attributes
 361         are extracted), tag name, and optional attributes.
 362         """
 363         tagname = tagname.lower()
 364         prefix = []
 365         atts = {}
 366         ids = []
 367         for (name, value) in attributes.items():
 368             atts[name.lower()] = value
 369         classes = []
 370         languages = []
 371         # unify class arguments and move language specification
 372         for cls in node.get('classes', []) + atts.pop('class', '').split() :
 373             if cls.startswith('language-'):
 374                 languages.append(cls[9:])
 375             elif cls.strip() and cls not in classes:
 376                 classes.append(cls)
 377         if languages:
 378             # attribute name is 'lang' in XHTML 1.0 but 'xml:lang' in 1.1
 379             atts[self.lang_attribute] = languages[0]
 380         if classes:
 381             atts['class'] = ' '.join(classes)
 382         assert 'id' not in atts
 383         ids.extend(node.get('ids', []))
 384         if 'ids' in atts:
 385             ids.extend(atts['ids'])
 386             del atts['ids']
 387         if ids:
 388             atts['id'] = ids[0]
 389             for id in ids[1:]:
 390                 # Add empty "span" elements for additional IDs.  Note
 391                 # that we cannot use empty "a" elements because there
 392                 # may be targets inside of references, but nested "a"
 393                 # elements aren't allowed in XHTML (even if they do
 394                 # not all have a "href" attribute).
 395                 if empty:
 396                     # Empty tag.  Insert target right in front of element.
 397                     prefix.append('<span id="%s"></span>' % id)
 398                 else:
 399                     # Non-empty tag.  Place the auxiliary <span> tag
 400                     # *inside* the element, as the first child.
 401                     suffix += '<span id="%s"></span>' % id
 402         attlist = atts.items()
 403         attlist.sort()
 404         parts = [tagname]
 405         for name, value in attlist:
 406             # value=None was used for boolean attributes without
 407             # value, but this isn't supported by XHTML.
 408             assert value is not None
 409             if isinstance(value, list):
 410                 values = [unicode(v) for v in value]
 411                 parts.append('%s="%s"' % (name.lower(),
 412                                           self.attval(' '.join(values))))
 413             else:
 414                 parts.append('%s="%s"' % (name.lower(),
 415                                           self.attval(unicode(value))))
 416         if empty:
 417             infix = ' /'
 418         else:
 419             infix = ''
 420         return ''.join(prefix) + '<%s%s>' % (' '.join(parts), infix) + suffix
 421
 422     def emptytag(self, node, tagname, suffix='\n', **attributes):
 423         """Construct and return an XML-compatible empty tag."""
 424         return self.starttag(node, tagname, suffix, empty=True, **attributes)
 425
 426     def set_class_on_child(self, node, class_, index=0):
 427         """
 428         Set class `class_` on the visible child no. index of `node`.
 429         Do nothing if node has fewer children than `index`.
 430         """
 431         children = [n for n in node if not isinstance(n, nodes.Invisible)]
 432         try:
 433             child = children[index]
 434         except IndexError:
 435             return
 436         child['classes'].append(class_)
 437
 438     def visit_Text(self, node):
 439         text = node.astext()
 440         encoded = self.encode(text)
 441         if self.in_mailto and self.settings.cloak_email_addresses:
 442             encoded = self.cloak_email(encoded)
 443         self.body.append(encoded)
 444
 445     def depart_Text(self, node):
 446         pass
 447
 448     def visit_abbreviation(self, node):
 449         # @@@ implementation incomplete ("title" attribute)
 450         self.body.append(self.starttag(node, 'abbr', ''))
 451
 452     def depart_abbreviation(self, node):
 453         self.body.append('</abbr>')
 454
 455     def visit_acronym(self, node):
 456         # @@@ implementation incomplete ("title" attribute)
 457         self.body.append(self.starttag(node, 'abbr', ''))
 458
 459     def depart_acronym(self, node):
 460         self.body.append('</abbr>')
 461
 462     def visit_address(self, node):
 463         self.visit_docinfo_item(node, 'address', meta=False)
 464         self.body.append(self.starttag(node, 'pre', '', CLASS='address'))
 465
 466     def depart_address(self, node):
 467         self.body.append('\n</pre>\n')
 468         self.depart_docinfo_item()
 469
 470     def visit_admonition(self, node):
 471         node['classes'].insert(0, 'admonition')
 472         self.body.append(self.starttag(node, 'div'))
 473
 474     def depart_admonition(self, node=None):
 475         self.body.append('</div>\n')
 476
 477     attribution_formats = {'dash': (u'\u2014', ''),
 478                            'parentheses': ('(', ')'),
 479                            'parens': ('(', ')'),
 480                            'none': ('', '')}
 481
 482     def visit_attribution(self, node):
 483         prefix, suffix = self.attribution_formats[self.settings.attribution]
 484         self.context.append(suffix)
 485         self.body.append(
 486             self.starttag(node, 'p', prefix, CLASS='attribution'))
 487         self.body.append(self.starttag(node, 'cite', ''))
 488
 489     def depart_attribution(self, node):
 490         self.body.append('</cite>' + self.context.pop() + '</p>\n')
 491
 492     # author, authors
 493     # ---------------
 494     # Use paragraphs instead of hard-coded linebreaks.
 495
 496     def visit_author(self, node):
 497         if not(isinstance(node.parent, nodes.authors)):
 498             self.visit_docinfo_item(node, 'author')
 499         self.body.append('<p>')
 500
 501     def depart_author(self, node):
 502         self.body.append('</p>')
 503         if isinstance(node.parent, nodes.authors):
 504             self.body.append('\n')
 505         else:
 506             self.depart_docinfo_item()
 507
 508     def visit_authors(self, node):
 509         self.visit_docinfo_item(node, 'authors', meta=False)
 510
 511     def depart_authors(self, node):
 512         self.depart_docinfo_item()
 513
 514     def visit_block_quote(self, node):
 515         self.body.append(self.starttag(node, 'blockquote'))
 516
 517     def depart_block_quote(self, node):
 518         self.body.append('</blockquote>\n')
 519
 520     def check_simple_list(self, node):
 521         """Check for a simple list that can be rendered compactly."""
 522         visitor = SimpleListChecker(self.document)
 523         try:
 524             node.walk(visitor)
 525         except nodes.NodeFound:
 526             return None
 527         else:
 528             return 1
 529
 530     # Compact lists
 531     # ------------
 532     # Include definition lists and field lists (in addition to ordered
 533     # and unordered lists) in the test if a list is "simple"  (cf. the
 534     # html4css1.HTMLTranslator docstring and the SimpleListChecker class at
 535     # the end of this file).
 536
 537     def is_compactable(self, node):
 538         # print "is_compactable %s ?" % node.__class__,
 539         # explicite class arguments have precedence
 540         if 'compact' in node['classes']:
 541             # print "explicitely compact"
 542             return True
 543         if 'open' in node['classes']:
 544             # print "explicitely open"
 545             return False
 546         # check config setting:
 547         if (isinstance(node, nodes.field_list) or
 548             isinstance(node, nodes.definition_list)
 549            ) and not self.settings.compact_field_lists:
 550             # print "`compact-field-lists` is False"
 551             return False
 552         if (isinstance(node, nodes.enumerated_list) or
 553             isinstance(node, nodes.bullet_list)
 554            ) and not self.settings.compact_lists:
 555             # print "`compact-lists` is False"
 556             return False
 557         # more special cases:
 558         if (self.topic_classes == ['contents']): # TODO: self.in_contents
 559             return True
 560         # check the list items:
 561         visitor = SimpleListChecker(self.document)
 562         try:
 563             node.walk(visitor)
 564         except nodes.NodeFound:
 565             # print "complex node"
 566             return False
 567         else:
 568             # print "simple list"
 569             return True
 570
 571     def visit_bullet_list(self, node):
 572         atts = {}
 573         old_compact_simple = self.compact_simple
 574         self.context.append((self.compact_simple, self.compact_p))
 575         self.compact_p = None
 576         self.compact_simple = self.is_compactable(node)
 577         if self.compact_simple and not old_compact_simple:
 578             atts['class'] = 'simple'
 579         self.body.append(self.starttag(node, 'ul', **atts))
 580
 581     def depart_bullet_list(self, node):
 582         self.compact_simple, self.compact_p = self.context.pop()
 583         self.body.append('</ul>\n')
 584
 585     def visit_caption(self, node):
 586         self.body.append(self.starttag(node, 'p', '', CLASS='caption'))
 587
 588     def depart_caption(self, node):
 589         self.body.append('</p>\n')
 590
 591     # citations
 592     # ---------
 593     # Use definition list instead of table for bibliographic references.
 594     # Join adjacent citation entries.
 595
 596     def visit_citation(self, node):
 597         if not self.in_footnote_list:
 598             self.body.append('<dl class="citation">\n')
 599             self.in_footnote_list = True
 600
 601     def depart_citation(self, node):
 602         self.body.append('</dd>\n')
 603         if not isinstance(node.next_node(descend=False, siblings=True),
 604                           nodes.citation):
 605             self.body.append('</dl>\n')
 606             self.in_footnote_list = False
 607
 608     def visit_citation_reference(self, node):
 609         href = '#'
 610         if 'refid' in node:
 611             href += node['refid']
 612         elif 'refname' in node:
 613             href += self.document.nameids[node['refname']]
 614         # else: # TODO system message (or already in the transform)?
 615         # 'Citation reference missing.'
 616         self.body.append(self.starttag(
 617             node, 'a', '[', CLASS='citation-reference', href=href))
 618
 619     def depart_citation_reference(self, node):
 620         self.body.append(']</a>')
 621
 622      # classifier
 623     # ----------
 624     # don't insert classifier-delimiter here (done by CSS)
 625
 626     def visit_classifier(self, node):
 627         self.body.append(self.starttag(node, 'span', '', CLASS='classifier'))
 628
 629     def depart_classifier(self, node):
 630         self.body.append('</span>')
 631
 632     def visit_colspec(self, node):
 633         self.colspecs.append(node)
 634         # "stubs" list is an attribute of the tgroup element:
 635         node.parent.stubs.append(node.attributes.get('stub'))
 636
 637     def depart_colspec(self, node):
 638         pass
 639
 640     def write_colspecs(self):
 641         width = 0
 642         for node in self.colspecs:
 643             width += node['colwidth']
 644         for node in self.colspecs:
 645             colwidth = int(node['colwidth'] * 100.0 / width + 0.5)
 646             self.body.append(self.emptytag(node, 'col',
 647                                            style='width: %i%%' % colwidth))
 648         self.colspecs = []
 649
 650     def visit_comment(self, node,
 651                       sub=re.compile('-(?=-)').sub):
 652         """Escape double-dashes in comment text."""
 653         self.body.append('<!-- %s -->\n' % sub('- ', node.astext()))
 654         # Content already processed:
 655         raise nodes.SkipNode
 656
 657     def visit_compound(self, node):
 658         self.body.append(self.starttag(node, 'div', CLASS='compound'))
 659         if len(node) > 1:
 660             node[0]['classes'].append('compound-first')
 661             node[-1]['classes'].append('compound-last')
 662             for child in node[1:-1]:
 663                 child['classes'].append('compound-middle')
 664
 665     def depart_compound(self, node):
 666         self.body.append('</div>\n')
 667
 668     def visit_container(self, node):
 669         self.body.append(self.starttag(node, 'div', CLASS='docutils container'))
 670
 671     def depart_container(self, node):
 672         self.body.append('</div>\n')
 673
 674     def visit_contact(self, node):
 675         self.visit_docinfo_item(node, 'contact', meta=False)
 676
 677     def depart_contact(self, node):
 678         self.depart_docinfo_item()
 679
 680     def visit_copyright(self, node):
 681         self.visit_docinfo_item(node, 'copyright', meta=False)
 682
 683     def depart_copyright(self, node):
 684         self.depart_docinfo_item()
 685
 686     def visit_date(self, node):
 687         self.visit_docinfo_item(node, 'date', meta=False)
 688
 689     def depart_date(self, node):
 690         self.depart_docinfo_item()
 691
 692     def visit_decoration(self, node):
 693         pass
 694
 695     def depart_decoration(self, node):
 696         pass
 697
 698     def visit_definition(self, node):
 699         self.body.append('</dt>\n')
 700         self.body.append(self.starttag(node, 'dd', ''))
 701
 702     def depart_definition(self, node):
 703         self.body.append('</dd>\n')
 704
 705     def visit_definition_list(self, node):
 706         classes = node.setdefault('classes', [])
 707         if self.is_compactable(node):
 708             classes.append('simple')
 709         self.body.append(self.starttag(node, 'dl'))
 710
 711     def depart_definition_list(self, node):
 712         self.body.append('</dl>\n')
 713
 714     def visit_definition_list_item(self, node):
 715         # pass class arguments, ids and names to definition term:
 716         node.children[0]['classes'] = (
 717             node.get('classes', []) + node.children[0].get('classes', []))
 718         node.children[0]['ids'] = (
 719             node.get('ids', []) + node.children[0].get('ids', []))
 720         node.children[0]['names'] = (
 721             node.get('names', []) + node.children[0].get('names', []))
 722
 723     def depart_definition_list_item(self, node):
 724         pass
 725
 726     def visit_description(self, node):
 727         self.body.append(self.starttag(node, 'dd', ''))
 728
 729     def depart_description(self, node):
 730         self.body.append('</dd>\n')
 731
 732
 733     # docinfo
 734     # -------
 735     # use definition list instead of table
 736
 737     def visit_docinfo(self, node):
 738         classes = 'docinfo'
 739         if (self.is_compactable(node)):
 740             classes += ' simple'
 741         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 742
 743     def depart_docinfo(self, node):
 744         self.body.append('</dl>\n')
 745
 746     def visit_docinfo_item(self, node, name, meta=True):
 747         if meta:
 748             meta_tag = '<meta name="%s" content="%s" />\n' \
 749                        % (name, self.attval(node.astext()))
 750             self.add_meta(meta_tag)
 751         self.body.append('<dt class="%s">%s</dt>\n'
 752                          % (name, self.language.labels[name]))
 753         self.body.append(self.starttag(node, 'dd', '', CLASS=name))
 754
 755     def depart_docinfo_item(self):
 756         self.body.append('</dd>\n')
 757
 758     def visit_doctest_block(self, node):
 759         self.body.append(self.starttag(node, 'pre', suffix='',
 760                                        CLASS='code python doctest'))
 761
 762     def depart_doctest_block(self, node):
 763         self.body.append('\n</pre>\n')
 764
 765     def visit_document(self, node):
 766         self.head.append('<title>%s</title>\n'
 767                          % self.encode(node.get('title', '')))
 768
 769     def depart_document(self, node):
 770         self.head_prefix.extend([self.doctype,
 771                                  self.head_prefix_template %
 772                                  {'lang': self.settings.language_code}])
 773         self.html_prolog.append(self.doctype)
 774         self.meta.insert(0, self.content_type % self.settings.output_encoding)
 775         self.head.insert(0, self.content_type % self.settings.output_encoding)
 776         if self.math_header:
 777             if self.math_output == 'mathjax':
 778                 self.head.extend(self.math_header)
 779             else:
 780                 self.stylesheet.extend(self.math_header)
 781         # skip content-type meta tag with interpolated charset value:
 782         self.html_head.extend(self.head[1:])
 783         self.body_prefix.append(self.starttag(node, 'div', CLASS='document'))
 784         self.body_suffix.insert(0, '</div>\n')
 785         self.fragment.extend(self.body) # self.fragment is the "naked" body
 786         self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo
 787                               + self.docinfo + self.body
 788                               + self.body_suffix[:-1])
 789         assert not self.context, 'len(context) = %s' % len(self.context)
 790
 791     def visit_emphasis(self, node):
 792         self.body.append(self.starttag(node, 'em', ''))
 793
 794     def depart_emphasis(self, node):
 795         self.body.append('</em>')
 796
 797     def visit_entry(self, node):
 798         atts = {'class': []}
 799         if isinstance(node.parent.parent, nodes.thead):
 800             atts['class'].append('head')
 801         if node.parent.parent.parent.stubs[node.parent.column]:
 802             # "stubs" list is an attribute of the tgroup element
 803             atts['class'].append('stub')
 804         if atts['class']:
 805             tagname = 'th'
 806             atts['class'] = ' '.join(atts['class'])
 807         else:
 808             tagname = 'td'
 809             del atts['class']
 810         node.parent.column += 1
 811         if 'morerows' in node:
 812             atts['rowspan'] = node['morerows'] + 1
 813         if 'morecols' in node:
 814             atts['colspan'] = node['morecols'] + 1
 815             node.parent.column += node['morecols']
 816         self.body.append(self.starttag(node, tagname, '', **atts))
 817         self.context.append('</%s>\n' % tagname.lower())
 818         # TODO: why did the html4css1 writer insert an NBSP into empty cells?
 819         # if len(node) == 0:              # empty cell
 820         #     self.body.append('&#0160;') # no-break space
 821
 822     def depart_entry(self, node):
 823         self.body.append(self.context.pop())
 824
 825     def visit_enumerated_list(self, node):
 826         """
 827         The 'start' attribute does not conform to HTML 4.01's strict.dtd, but
 828         CSS1 doesn't help. CSS2 isn't widely enough supported yet to be
 829         usable.
 830         """
 831         atts = {}
 832         if 'start' in node:
 833             atts['start'] = node['start']
 834         if 'enumtype' in node:
 835             atts['class'] = node['enumtype']
 836         if self.is_compactable(node):
 837             atts['class'] = (atts.get('class', '') + ' simple').strip()
 838         self.body.append(self.starttag(node, 'ol', **atts))
 839
 840     def depart_enumerated_list(self, node):
 841         self.body.append('</ol>\n')
 842
 843     # field-list
 844     # ----------
 845     # set as definition list, styled with CSS
 846
 847     def visit_field_list(self, node):
 848         # Keep simple paragraphs in the field_body to enable CSS
 849         # rule to start body on new line if the label is too long
 850         classes = 'field-list'
 851         if (self.is_compactable(node)):
 852             classes += ' simple'
 853         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 854
 855     def depart_field_list(self, node):
 856         self.body.append('</dl>\n')
 857
 858     def visit_field(self, node):
 859         pass
 860
 861     def depart_field(self, node):
 862         pass
 863
 864     # as field is ignored, pass class arguments to field-name and field-body:
 865
 866     def visit_field_name(self, node):
 867         self.body.append(self.starttag(node, 'dt', '',
 868                                        CLASS=''.join(node.parent['classes'])))
 869
 870     def depart_field_name(self, node):
 871         self.body.append('</dt>\n')
 872
 873     def visit_field_body(self, node):
 874         self.body.append(self.starttag(node, 'dd', '',
 875                                        CLASS=''.join(node.parent['classes'])))
 876
 877     def depart_field_body(self, node):
 878         self.body.append('</dd>\n')
 879
 880     def visit_figure(self, node):
 881         atts = {'class': 'figure'}
 882         if node.get('width'):
 883             atts['style'] = 'width: %s' % node['width']
 884         if node.get('align'):
 885             atts['class'] += " align-" + node['align']
 886         self.body.append(self.starttag(node, 'div', **atts))
 887
 888     def depart_figure(self, node):
 889         self.body.append('</div>\n')
 890
 891     # use HTML 5 <footer> element?
 892     def visit_footer(self, node):
 893         self.context.append(len(self.body))
 894
 895     def depart_footer(self, node):
 896         start = self.context.pop()
 897         footer = [self.starttag(node, 'div', CLASS='footer'),
 898                   '<hr class="footer" />\n']
 899         footer.extend(self.body[start:])
 900         footer.append('\n</div>\n')
 901         self.footer.extend(footer)
 902         self.body_suffix[:0] = footer
 903         del self.body[start:]
 904
 905     # footnotes
 906     # ---------
 907     # use definition list instead of table for footnote text
 908
 909     # TODO: use the new HTML5 element <aside>? (Also for footnote text)
 910     def visit_footnote(self, node):
 911         if not self.in_footnote_list:
 912             classes = 'footnote ' + self.settings.footnote_references
 913             self.body.append('<dl class="%s">\n'%classes)
 914             self.in_footnote_list = True
 915
 916     def depart_footnote(self, node):
 917         self.body.append('</dd>\n')
 918         if not isinstance(node.next_node(descend=False, siblings=True),
 919                           nodes.footnote):
 920             self.body.append('</dl>\n')
 921             self.in_footnote_list = False
 922
 923     def visit_footnote_reference(self, node):
 924         href = '#' + node['refid']
 925         classes = 'footnote-reference ' + self.settings.footnote_references
 926         self.body.append(self.starttag(node, 'a', '', #suffix,
 927                                        CLASS=classes, href=href))
 928
 929     def depart_footnote_reference(self, node):
 930         # self.body.append(self.context.pop() + '</a>')
 931         self.body.append('</a>')
 932
 933     def visit_generated(self, node):
 934         if 'sectnum' in node['classes']:
 935             # get section number (strip trailing no-break-spaces)
 936             sectnum = node.astext().rstrip(u' ')
 937             # print sectnum.encode('utf-8')
 938             self.body.append('<span class="sectnum">%s</span> '
 939                                     % self.encode(sectnum))
 940             # Content already processed:
 941             raise nodes.SkipNode
 942
 943     def depart_generated(self, node):
 944         pass
 945
 946     def visit_header(self, node):
 947         self.context.append(len(self.body))
 948
 949     def depart_header(self, node):
 950         start = self.context.pop()
 951         header = [self.starttag(node, 'div', CLASS='header')]
 952         header.extend(self.body[start:])
 953         header.append('\n<hr class="header"/>\n</div>\n')
 954         self.body_prefix.extend(header)
 955         self.header.extend(header)
 956         del self.body[start:]
 957
 958     # Image types to place in an <object> element
 959     # SVG not supported by IE up to version 8
 960     # (html4css1 strives for IE6 compatibility)
 961     object_image_types = {#'.svg': 'image/svg+xml',
 962                          '.swf': 'application/x-shockwave-flash'}
 963
 964     def visit_image(self, node):
 965         atts = {}
 966         uri = node['uri']
 967         ext = os.path.splitext(uri)[1].lower()
 968         if ext in self.object_image_types:
 969             atts['data'] = uri
 970             atts['type'] = self.object_image_types[ext]
 971         else:
 972             atts['src'] = uri
 973             atts['alt'] = node.get('alt', uri)
 974         # image size
 975         if 'width' in node:
 976             atts['width'] = node['width']
 977         if 'height' in node:
 978             atts['height'] = node['height']
 979         if 'scale' in node:
 980             if (PIL and not ('width' in node and 'height' in node)
 981                 and self.settings.file_insertion_enabled):
 982                 imagepath = urllib.url2pathname(uri)
 983                 try:
 984                     img = PIL.Image.open(
 985                             imagepath.encode(sys.getfilesystemencoding()))
 986                 except (IOError, UnicodeEncodeError):
 987                     pass # TODO: warn?
 988                 else:
 989                     self.settings.record_dependencies.add(
 990                         imagepath.replace('\\', '/'))
 991                     if 'width' not in atts:
 992                         atts['width'] = '%dpx' % img.size[0]
 993                     if 'height' not in atts:
 994                         atts['height'] = '%dpx' % img.size[1]
 995                     del img
 996             for att_name in 'width', 'height':
 997                 if att_name in atts:
 998                     match = re.match(r'([0-9.]+)(\S*)$', atts[att_name])
 999                     assert match
1000                     atts[att_name] = '%s%s' % (
1001                         float(match.group(1)) * (float(node['scale']) / 100),
1002                         match.group(2))
1003         style = []
1004         for att_name in 'width', 'height':
1005             if att_name in atts:
1006                 if re.match(r'^[0-9.]+$', atts[att_name]):
1007                     # Interpret unitless values as pixels.
1008                     atts[att_name] += 'px'
1009                 style.append('%s: %s;' % (att_name, atts[att_name]))
1010                 del atts[att_name]
1011         if style:
1012             atts['style'] = ' '.join(style)
1013         if (isinstance(node.parent, nodes.TextElement) or
1014             (isinstance(node.parent, nodes.reference) and
1015              not isinstance(node.parent.parent, nodes.TextElement))):
1016             # Inline context or surrounded by <a>...</a>.
1017             suffix = ''
1018         else:
1019             suffix = '\n'
1020         if 'align' in node:
1021             atts['class'] = 'align-%s' % node['align']
1022         if ext in self.object_image_types:
1023             # do NOT use an empty tag: incorrect rendering in browsers
1024             self.body.append(self.starttag(node, 'object', suffix, **atts) +
1025                              node.get('alt', uri) + '</object>' + suffix)
1026         else:
1027             self.body.append(self.emptytag(node, 'img', suffix, **atts))
1028
1029     def depart_image(self, node):
1030         # self.body.append(self.context.pop())
1031         pass
1032
1033     def visit_inline(self, node):
1034         self.body.append(self.starttag(node, 'span', ''))
1035
1036     def depart_inline(self, node):
1037         self.body.append('</span>')
1038
1039     # footnote and citation labels:
1040     def visit_label(self, node):
1041         if (isinstance(node.parent, nodes.footnote)):
1042             classes = self.settings.footnote_references
1043         else:
1044             classes = 'brackets'
1045         # pass parent node to get id into starttag:
1046         self.body.append(self.starttag(node.parent, 'dt', '', CLASS='label'))
1047         self.body.append(self.starttag(node, 'span', '', CLASS=classes))
1048         # footnote/citation backrefs:
1049         if self.settings.footnote_backlinks:
1050             backrefs = node.parent['backrefs']
1051             if len(backrefs) == 1:
1052                 self.body.append('<a class="fn-backref" href="#%s">'
1053                                  % backrefs[0])
1054
1055     def depart_label(self, node):
1056         self.body.append('</span>')
1057         if self.settings.footnote_backlinks:
1058             backrefs = node.parent['backrefs']
1059             if len(backrefs) == 1:
1060                 self.body.append('</a>')
1061             elif len(backrefs) > 1:
1062                 # Python 2.4 fails with enumerate(backrefs, 1)
1063                 backlinks = ['<a href="#%s">%s</a>' % (ref, i+1)
1064                              for (i, ref) in enumerate(backrefs)]
1065                 self.body.append('<span class="fn-backref">(%s)</span>'
1066                                  % ','.join(backlinks))
1067         self.body.append('</dt>\n<dd>')
1068
1069     def visit_legend(self, node):
1070         self.body.append(self.starttag(node, 'div', CLASS='legend'))
1071
1072     def depart_legend(self, node):
1073         self.body.append('</div>\n')
1074
1075     def visit_line(self, node):
1076         self.body.append(self.starttag(node, 'div', suffix='', CLASS='line'))
1077         if not len(node):
1078             self.body.append('<br />')
1079
1080     def depart_line(self, node):
1081         self.body.append('</div>\n')
1082
1083     def visit_line_block(self, node):
1084         self.body.append(self.starttag(node, 'div', CLASS='line-block'))
1085
1086     def depart_line_block(self, node):
1087         self.body.append('</div>\n')
1088
1089     def visit_list_item(self, node):
1090         self.body.append(self.starttag(node, 'li', ''))
1091
1092     def depart_list_item(self, node):
1093         self.body.append('</li>\n')
1094
1095     # inline literal
1096     def visit_literal(self, node):
1097         # special case: "code" role
1098         classes = node.get('classes', [])
1099         if 'code' in classes:
1100             # filter 'code' from class arguments
1101             node['classes'] = [cls for cls in classes if cls != 'code']
1102             self.body.append(self.starttag(node, 'code', ''))
1103             return
1104         self.body.append(
1105             self.starttag(node, 'span', '', CLASS='docutils literal'))
1106         text = node.astext()
1107         # remove hard line breaks (except if in a parsed-literal block)
1108         if not isinstance(node.parent, nodes.literal_block):
1109             text = text.replace('\n', ' ')
1110         # Protect text like ``--an-option`` and the regular expression
1111         # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
1112         for token in self.words_and_spaces.findall(text):
1113             if token.strip() and self.sollbruchstelle.search(token):
1114                 self.body.append('<span class="pre">%s</span>'
1115                                     % self.encode(token))
1116             else:
1117                 self.body.append(self.encode(token))
1118         self.body.append('</span>')
1119         # Content already processed:
1120         raise nodes.SkipNode
1121
1122     def depart_literal(self, node):
1123         # skipped unless literal element is from "code" role:
1124         self.body.append('</code>')
1125
1126     def visit_literal_block(self, node):
1127         self.body.append(self.starttag(node, 'pre', '', CLASS='literal-block'))
1128         if 'code' in node.get('classes', []):
1129             self.body.append('<code>')
1130
1131     def depart_literal_block(self, node):
1132         if 'code' in node.get('classes', []):
1133             self.body.append('</code>')
1134         self.body.append('</pre>\n')
1135
1136     # Mathematics:
1137     # As there is no native HTML math support, we provide alternatives
1138     # for the math-output: LaTeX and MathJax simply wrap the content,
1139     # HTML and MathML also convert the math_code.
1140     # HTML container
1141     math_tags = {# math_output: (block, inline, class-arguments)
1142                  'mathml':      ('div', '', ''),
1143                  'html':        ('div', 'span', 'formula'),
1144                  'mathjax':     ('div', 'span', 'math'),
1145                  'latex':       ('pre', 'tt',   'math'),
1146                 }
1147
1148     def visit_math(self, node, math_env=''):
1149         # If the method is called from visit_math_block(), math_env != ''.
1150
1151         if self.math_output not in self.math_tags:
1152             self.document.reporter.error(
1153                 'math-output format "%s" not supported '
1154                 'falling back to "latex"'% self.math_output)
1155             self.math_output = 'latex'
1156         tag = self.math_tags[self.math_output][math_env == '']
1157         clsarg = self.math_tags[self.math_output][2]
1158         # LaTeX container
1159         wrappers = {# math_mode: (inline, block)
1160                     'mathml':  ('$%s$',   u'\\begin{%s}\n%s\n\\end{%s}'),
1161                     'html':    ('$%s$',   u'\\begin{%s}\n%s\n\\end{%s}'),
1162                     'mathjax': ('\(%s\)', u'\\begin{%s}\n%s\n\\end{%s}'),
1163                     'latex':   (None,     None),
1164                    }
1165         wrapper = wrappers[self.math_output][math_env != '']
1166         if self.math_output == 'mathml' and (not self.math_output_options or
1167                                 self.math_output_options[0] == 'blahtexml'):
1168             wrapper = None
1169         # get and wrap content
1170         math_code = node.astext().translate(unichar2tex.uni2tex_table)
1171         if wrapper:
1172             try: # wrapper with three "%s"
1173                 math_code = wrapper % (math_env, math_code, math_env)
1174             except TypeError: # wrapper with one "%s"
1175                 math_code = wrapper % math_code
1176         # settings and conversion
1177         if self.math_output in ('latex', 'mathjax'):
1178             math_code = self.encode(math_code)
1179         if self.math_output == 'mathjax' and not self.math_header:
1180             if self.math_output_options:
1181                 self.mathjax_url = self.math_output_options[0]
1182             self.math_header = [self.mathjax_script % self.mathjax_url]
1183         elif self.math_output == 'html':
1184             if self.math_output_options and not self.math_header:
1185                 self.math_header = [self.stylesheet_call(
1186                     utils.find_file_in_dirs(s, self.settings.stylesheet_dirs))
1187                     for s in self.math_output_options[0].split(',')]
1188             # TODO: fix display mode in matrices and fractions
1189             math2html.DocumentParameters.displaymode = (math_env != '')
1190             math_code = math2html.math2html(math_code)
1191         elif self.math_output == 'mathml':
1192             self.doctype = self.doctype_mathml
1193             # self.content_type = self.content_type_mathml
1194             converter = ' '.join(self.math_output_options).lower()
1195             try:
1196                 if converter == 'latexml':
1197                     math_code = tex2mathml_extern.latexml(math_code,
1198                                                     self.document.reporter)
1199                 elif converter == 'ttm':
1200                     math_code = tex2mathml_extern.ttm(math_code,
1201                                                     self.document.reporter)
1202                 elif converter == 'blahtexml':
1203                     math_code = tex2mathml_extern.blahtexml(math_code,
1204                         inline=not(math_env),
1205                         reporter=self.document.reporter)
1206                 elif not converter:
1207                     math_code = latex2mathml.tex2mathml(math_code,
1208                                                         inline=not(math_env))
1209                 else:
1210                     self.document.reporter.error('option "%s" not supported '
1211                     'with math-output "MathML"')
1212             except OSError:
1213                     raise OSError('is "latexmlmath" in your PATH?')
1214             except SyntaxError, err:
1215                 err_node = self.document.reporter.error(err, base_node=node)
1216                 self.visit_system_message(err_node)
1217                 self.body.append(self.starttag(node, 'p'))
1218                 self.body.append(u','.join(err.args))
1219                 self.body.append('</p>\n')
1220                 self.body.append(self.starttag(node, 'pre',
1221                                                CLASS='literal-block'))
1222                 self.body.append(self.encode(math_code))
1223                 self.body.append('\n</pre>\n')
1224                 self.depart_system_message(err_node)
1225                 raise nodes.SkipNode
1226         # append to document body
1227         if tag:
1228             self.body.append(self.starttag(node, tag,
1229                                            suffix='\n'*bool(math_env),
1230                                            CLASS=clsarg))
1231         self.body.append(math_code)
1232         if math_env: # block mode (equation, display)
1233             self.body.append('\n')
1234         if tag:
1235             self.body.append('</%s>' % tag)
1236         if math_env:
1237             self.body.append('\n')
1238         # Content already processed:
1239         raise nodes.SkipNode
1240
1241     def depart_math(self, node):
1242         pass # never reached
1243
1244     def visit_math_block(self, node):
1245         # print node.astext().encode('utf8')
1246         math_env = pick_math_environment(node.astext())
1247         self.visit_math(node, math_env=math_env)
1248
1249     def depart_math_block(self, node):
1250         pass # never reached
1251
1252     # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
1253     # HTML5/polyglott recommends using both
1254     def visit_meta(self, node):
1255         if node.hasattr('lang'):
1256             node['xml:lang'] = node['lang']
1257             # del(node['lang'])
1258         meta = self.emptytag(node, 'meta', **node.non_default_attributes())
1259         self.add_meta(meta)
1260
1261     def depart_meta(self, node):
1262         pass
1263
1264     def add_meta(self, tag):
1265         self.meta.append(tag)
1266         self.head.append(tag)
1267
1268     def visit_option(self, node):
1269         self.body.append(self.starttag(node, 'span', '', CLASS='option'))
1270
1271     def depart_option(self, node):
1272         self.body.append('</span>')
1273         if isinstance(node.next_node(descend=False, siblings=True),
1274                       nodes.option):
1275             self.body.append(', ')
1276
1277     def visit_option_argument(self, node):
1278         self.body.append(node.get('delimiter', ' '))
1279         self.body.append(self.starttag(node, 'var', ''))
1280
1281     def depart_option_argument(self, node):
1282         self.body.append('</var>')
1283
1284     def visit_option_group(self, node):
1285         self.body.append(self.starttag(node, 'dt', ''))
1286         self.body.append('<kbd>')
1287
1288     def depart_option_group(self, node):
1289         self.body.append('</kbd></dt>\n')
1290
1291     def visit_option_list(self, node):
1292         self.body.append(
1293             self.starttag(node, 'dl', CLASS='option-list'))
1294
1295     def depart_option_list(self, node):
1296         self.body.append('</dl>\n')
1297
1298     def visit_option_list_item(self, node):
1299         pass
1300
1301     def depart_option_list_item(self, node):
1302         pass
1303
1304     def visit_option_string(self, node):
1305         pass
1306
1307     def depart_option_string(self, node):
1308         pass
1309
1310     def visit_organization(self, node):
1311         self.visit_docinfo_item(node, 'organization', meta=False)
1312
1313     def depart_organization(self, node):
1314         self.depart_docinfo_item()
1315
1316     # Do not omit <p> tags
1317     # --------------------
1318     #
1319     # The HTML4CSS1 writer does this to "produce
1320     # visually compact lists (less vertical whitespace)". This writer
1321     # relies on CSS rules for"visual compactness".
1322     #
1323     # * In XHTML 1.1, e.g. a <blockquote> element may not contain
1324     #   character data, so you cannot drop the <p> tags.
1325     # * Keeping simple paragraphs in the field_body enables a CSS
1326     #   rule to start the field-body on a new line if the label is too long
1327     # * it makes the code simpler.
1328     #
1329     # TODO: omit paragraph tags in simple table cells?
1330
1331     def visit_paragraph(self, node):
1332         self.body.append(self.starttag(node, 'p', ''))
1333
1334     def depart_paragraph(self, node):
1335         self.body.append('</p>')
1336         if not (isinstance(node.parent, (nodes.list_item, nodes.entry)) and
1337                 (len(node.parent) == 1)):
1338             self.body.append('\n')
1339
1340     def visit_problematic(self, node):
1341         if node.hasattr('refid'):
1342             self.body.append('<a href="#%s">' % node['refid'])
1343             self.context.append('</a>')
1344         else:
1345             self.context.append('')
1346         self.body.append(self.starttag(node, 'span', '', CLASS='problematic'))
1347
1348     def depart_problematic(self, node):
1349         self.body.append('</span>')
1350         self.body.append(self.context.pop())
1351
1352     def visit_raw(self, node):
1353         if 'html' in node.get('format', '').split():
1354             t = isinstance(node.parent, nodes.TextElement) and 'span' or 'div'
1355             if node['classes']:
1356                 self.body.append(self.starttag(node, t, suffix=''))
1357             self.body.append(node.astext())
1358             if node['classes']:
1359                 self.body.append('</%s>' % t)
1360         # Keep non-HTML raw text out of output:
1361         raise nodes.SkipNode
1362
1363     def visit_reference(self, node):
1364         atts = {'class': 'reference'}
1365         if 'refuri' in node:
1366             atts['href'] = node['refuri']
1367             if ( self.settings.cloak_email_addresses
1368                  and atts['href'].startswith('mailto:')):
1369                 atts['href'] = self.cloak_mailto(atts['href'])
1370                 self.in_mailto = True
1371             atts['class'] += ' external'
1372         else:
1373             assert 'refid' in node, \
1374                    'References must have "refuri" or "refid" attribute.'
1375             atts['href'] = '#' + node['refid']
1376             atts['class'] += ' internal'
1377         if not isinstance(node.parent, nodes.TextElement):
1378             assert len(node) == 1 and isinstance(node[0], nodes.image)
1379             atts['class'] += ' image-reference'
1380         self.body.append(self.starttag(node, 'a', '', **atts))
1381
1382     def depart_reference(self, node):
1383         self.body.append('</a>')
1384         if not isinstance(node.parent, nodes.TextElement):
1385             self.body.append('\n')
1386         self.in_mailto = False
1387
1388     def visit_revision(self, node):
1389         self.visit_docinfo_item(node, 'revision', meta=False)
1390
1391     def depart_revision(self, node):
1392         self.depart_docinfo_item()
1393
1394     def visit_row(self, node):
1395         self.body.append(self.starttag(node, 'tr', ''))
1396         node.column = 0
1397
1398     def depart_row(self, node):
1399         self.body.append('</tr>\n')
1400
1401     def visit_rubric(self, node):
1402         self.body.append(self.starttag(node, 'p', '', CLASS='rubric'))
1403
1404     def depart_rubric(self, node):
1405         self.body.append('</p>\n')
1406
1407     # TODO: use the new HTML 5 element <section>?
1408     def visit_section(self, node):
1409         self.section_level += 1
1410         self.body.append(
1411             self.starttag(node, 'div', CLASS='section'))
1412
1413     def depart_section(self, node):
1414         self.section_level -= 1
1415         self.body.append('</div>\n')
1416
1417     # TODO: use the new HTML5 element <aside>? (Also for footnote text)
1418     def visit_sidebar(self, node):
1419         self.body.append(
1420             self.starttag(node, 'div', CLASS='sidebar'))
1421         self.in_sidebar = True
1422
1423     def depart_sidebar(self, node):
1424         self.body.append('</div>\n')
1425         self.in_sidebar = False
1426
1427     def visit_status(self, node):
1428         self.visit_docinfo_item(node, 'status', meta=False)
1429
1430     def depart_status(self, node):
1431         self.depart_docinfo_item()
1432
1433     def visit_strong(self, node):
1434         self.body.append(self.starttag(node, 'strong', ''))
1435
1436     def depart_strong(self, node):
1437         self.body.append('</strong>')
1438
1439     def visit_subscript(self, node):
1440         self.body.append(self.starttag(node, 'sub', ''))
1441
1442     def depart_subscript(self, node):
1443         self.body.append('</sub>')
1444
1445     def visit_substitution_definition(self, node):
1446         """Internal only."""
1447         raise nodes.SkipNode
1448
1449     def visit_substitution_reference(self, node):
1450         self.unimplemented_visit(node)
1451
1452     # h1–h6 elements must not be used to markup subheadings, subtitles,
1453     # alternative titles and taglines unless intended to be the heading for a
1454     # new section or subsection.
1455     # -- http://www.w3.org/TR/html/sections.html#headings-and-sections
1456
1457     def visit_subtitle(self, node):
1458         if isinstance(node.parent, nodes.sidebar):
1459             classes = 'sidebar-subtitle'
1460         elif isinstance(node.parent, nodes.document):
1461             classes = 'subtitle'
1462             self.in_document_title = len(self.body)
1463         elif isinstance(node.parent, nodes.section):
1464             classes = 'section-subtitle'
1465         self.body.append(self.starttag(node, 'p', '', CLASS=classes))
1466
1467     def depart_subtitle(self, node):
1468         self.body.append('</p>\n')
1469         if self.in_document_title:
1470             self.subtitle = self.body[self.in_document_title:-1]
1471             self.in_document_title = 0
1472             self.body_pre_docinfo.extend(self.body)
1473             self.html_subtitle.extend(self.body)
1474             del self.body[:]
1475
1476     def visit_superscript(self, node):
1477         self.body.append(self.starttag(node, 'sup', ''))
1478
1479     def depart_superscript(self, node):
1480         self.body.append('</sup>')
1481
1482     def visit_system_message(self, node):
1483         self.body.append(self.starttag(node, 'div', CLASS='system-message'))
1484         self.body.append('<p class="system-message-title">')
1485         backref_text = ''
1486         if len(node['backrefs']):
1487             backrefs = node['backrefs']
1488             if len(backrefs) == 1:
1489                 backref_text = ('; <em><a href="#%s">backlink</a></em>'
1490                                 % backrefs[0])
1491             else:
1492                 i = 1
1493                 backlinks = []
1494                 for backref in backrefs:
1495                     backlinks.append('<a href="#%s">%s</a>' % (backref, i))
1496                     i += 1
1497                 backref_text = ('; <em>backlinks: %s</em>'
1498                                 % ', '.join(backlinks))
1499         if node.hasattr('line'):
1500             line = ', line %s' % node['line']
1501         else:
1502             line = ''
1503         self.body.append('System Message: %s/%s '
1504                          '(<span class="docutils literal">%s</span>%s)%s</p>\n'
1505                          % (node['type'], node['level'],
1506                             self.encode(node['source']), line, backref_text))
1507
1508     def depart_system_message(self, node):
1509         self.body.append('</div>\n')
1510
1511     # tables
1512     # ------
1513     # no hard-coded border setting in the table head::
1514
1515     def visit_table(self, node):
1516         classes = [cls.strip(u' \t\n')
1517                    for cls in self.settings.table_style.split(',')]
1518         tag = self.starttag(node, 'table', CLASS=' '.join(classes))
1519         self.body.append(tag)
1520
1521     def depart_table(self, node):
1522         self.body.append('</table>\n')
1523
1524     def visit_target(self, node):
1525         if not ('refuri' in node or 'refid' in node
1526                 or 'refname' in node):
1527             self.body.append(self.starttag(node, 'span', '', CLASS='target'))
1528             self.context.append('</span>')
1529         else:
1530             self.context.append('')
1531
1532     def depart_target(self, node):
1533         self.body.append(self.context.pop())
1534
1535     # no hard-coded vertical alignment in table body::
1536
1537     def visit_tbody(self, node):
1538         self.write_colspecs()
1539         self.body.append(self.context.pop()) # '</colgroup>\n' or ''
1540         self.body.append(self.starttag(node, 'tbody'))
1541
1542     def depart_tbody(self, node):
1543         self.body.append('</tbody>\n')
1544
1545     def visit_term(self, node):
1546         self.body.append(self.starttag(node, 'dt', ''))
1547
1548     def depart_term(self, node):
1549         """
1550         Leave the end tag to `self.visit_definition()`, in case there's a
1551         classifier.
1552         """
1553         pass
1554
1555     def visit_tgroup(self, node):
1556         # Mozilla needs <colgroup>:
1557         self.body.append(self.starttag(node, 'colgroup'))
1558         # Appended by thead or tbody:
1559         self.context.append('</colgroup>\n')
1560         node.stubs = []
1561
1562     def depart_tgroup(self, node):
1563         pass
1564
1565     def visit_thead(self, node):
1566         self.write_colspecs()
1567         self.body.append(self.context.pop()) # '</colgroup>\n'
1568         # There may or may not be a <thead>; this is for <tbody> to use:
1569         self.context.append('')
1570         self.body.append(self.starttag(node, 'thead'))
1571
1572     def depart_thead(self, node):
1573         self.body.append('</thead>\n')
1574
1575     def visit_title(self, node):
1576         """Only 6 section levels are supported by HTML."""
1577         check_id = 0  # TODO: is this a bool (False) or a counter?
1578         close_tag = '</p>\n'
1579         if isinstance(node.parent, nodes.topic):
1580             self.body.append(
1581                   self.starttag(node, 'p', '', CLASS='topic-title first'))
1582         elif isinstance(node.parent, nodes.sidebar):
1583             self.body.append(
1584                   self.starttag(node, 'p', '', CLASS='sidebar-title'))
1585         elif isinstance(node.parent, nodes.Admonition):
1586             self.body.append(
1587                   self.starttag(node, 'p', '', CLASS='admonition-title'))
1588         elif isinstance(node.parent, nodes.table):
1589             self.body.append(
1590                   self.starttag(node, 'caption', ''))
1591             close_tag = '</caption>\n'
1592         elif isinstance(node.parent, nodes.document):
1593             self.body.append(self.starttag(node, 'h1', '', CLASS='title'))
1594             close_tag = '</h1>\n'
1595             self.in_document_title = len(self.body)
1596         else:
1597             assert isinstance(node.parent, nodes.section)
1598             h_level = self.section_level + self.initial_header_level - 1
1599             atts = {}
1600             if (len(node.parent) >= 2 and
1601                 isinstance(node.parent[1], nodes.subtitle)):
1602                 atts['CLASS'] = 'with-subtitle'
1603             self.body.append(
1604                   self.starttag(node, 'h%s' % h_level, '', **atts))
1605             atts = {}
1606             if node.hasattr('refid'):
1607                 atts['class'] = 'toc-backref'
1608                 atts['href'] = '#' + node['refid']
1609             if atts:
1610                 self.body.append(self.starttag({}, 'a', '', **atts))
1611                 close_tag = '</a></h%s>\n' % (h_level)
1612             else:
1613                 close_tag = '</h%s>\n' % (h_level)
1614         self.context.append(close_tag)
1615
1616     def depart_title(self, node):
1617         self.body.append(self.context.pop())
1618         if self.in_document_title:
1619             self.title = self.body[self.in_document_title:-1]
1620             self.in_document_title = 0
1621             self.body_pre_docinfo.extend(self.body)
1622             self.html_title.extend(self.body)
1623             del self.body[:]
1624
1625     def visit_title_reference(self, node):
1626         self.body.append(self.starttag(node, 'cite', ''))
1627
1628     def depart_title_reference(self, node):
1629         self.body.append('</cite>')
1630
1631     # TODO: use the new HTML5 element <aside>? (Also for footnote text)
1632     def visit_topic(self, node):
1633         self.body.append(self.starttag(node, 'div', CLASS='topic'))
1634         self.topic_classes = node['classes']
1635         # TODO: replace with ::
1636         #   self.in_contents = 'contents' in node['classes']
1637
1638     def depart_topic(self, node):
1639         self.body.append('</div>\n')
1640         self.topic_classes = []
1641         # TODO self.in_contents = False
1642
1643     def visit_transition(self, node):
1644         self.body.append(self.emptytag(node, 'hr', CLASS='docutils'))
1645
1646     def depart_transition(self, node):
1647         pass
1648
1649     def visit_version(self, node):
1650         self.visit_docinfo_item(node, 'version', meta=False)
1651
1652     def depart_version(self, node):
1653         self.depart_docinfo_item()
1654
1655     def unimplemented_visit(self, node):
1656         raise NotImplementedError('visiting unimplemented node type: %s'
1657                                   % node.__class__.__name__)
1658
1659
1660 class SimpleListChecker(nodes.GenericNodeVisitor):
1661
1662     """
1663     Raise `nodes.NodeFound` if non-simple list item is encountered.
1664
1665     Here "simple" means a list item containing nothing other than a single
1666     paragraph, a simple list, or a paragraph followed by a simple list.
1667
1668     This version also checks for simple field lists and docinfo.
1669     """
1670
1671     def default_visit(self, node):
1672         raise nodes.NodeFound
1673
1674     def visit_list_item(self, node):
1675         # print "visiting list item", node.__class__
1676         children = [child for child in node.children
1677                     if not isinstance(child, nodes.Invisible)]
1678         # print "has %s visible children" % len(children)
1679         if (children and isinstance(children[0], nodes.paragraph)
1680             and (isinstance(children[-1], nodes.bullet_list) or
1681                  isinstance(children[-1], nodes.enumerated_list) or
1682                  isinstance(children[-1], nodes.field_list))):
1683             children.pop()
1684         # print "%s children remain" % len(children)
1685         if len(children) <= 1:
1686             return
1687         else:
1688             # print "found", child.__class__, "in", node.__class__
1689             raise nodes.NodeFound
1690
1691     def pass_node(self, node):
1692         pass
1693
1694     def ignore_node(self, node):
1695         # ignore nodes that are never complex (can contain only inline nodes)
1696         raise nodes.SkipNode
1697
1698     # Paragraphs and text
1699     visit_Text = ignore_node
1700     visit_paragraph = ignore_node
1701
1702     # Lists
1703     visit_bullet_list = pass_node
1704     visit_enumerated_list = pass_node
1705     visit_docinfo = pass_node
1706
1707     # Docinfo nodes:
1708     visit_author = ignore_node
1709     visit_authors = visit_list_item
1710     visit_address = visit_list_item
1711     visit_contact = pass_node
1712     visit_copyright = ignore_node
1713     visit_date = ignore_node
1714     visit_organization = ignore_node
1715     visit_status = ignore_node
1716     visit_version = visit_list_item
1717
1718     # Definition list:
1719     visit_definition_list = pass_node
1720     visit_definition_list_item = pass_node
1721     visit_term = ignore_node
1722     visit_classifier = pass_node
1723     visit_definition = visit_list_item
1724
1725     # Field list:
1726     visit_field_list = pass_node
1727     visit_field = pass_node
1728     # the field body corresponds to a list item
1729     visit_field_body = visit_list_item
1730     visit_field_name = ignore_node
1731
1732     # Invisible nodes should be ignored.
1733     visit_comment = ignore_node
1734     visit_substitution_definition = ignore_node
1735     visit_target = ignore_node
1736     visit_pending = ignore_node