docutils/writers/xhtml11/__init__.py

   1 # .. coding: utf8
   2 # :Author: Günter Milde <milde@users.berlios.de>
   3 # :Revision: $Revision$
   4 # :Date: $Date: 2005-06-28$
   5 # :Copyright: © 2005, 2009 Günter Milde.
   6 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
   7 #
   8 #    Copying and distribution of this file, with or without modification,
   9 #    are permitted in any medium without royalty provided the copyright
  10 #    notice and this notice are preserved.
  11 #    This file is offered as-is, without any warranty.
  12 #
  13 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
  14
  15 """
  16 Strict HyperText Markup Language document tree Writer.
  17
  18 This is a variant of Docutils' standard 'html4css1' writer.
  19
  20 GOAL:
  21  * The output conforms to the XHTML version 1.1 DTD.
  22  * It contains no hard-coded formatting information that would prevent
  23    layout design by cascading style sheets.
  24 """
  25
  26 __docformat__ = 'reStructuredText'
  27
  28 import os
  29 import os.path
  30 import re
  31
  32 import docutils
  33 from docutils import frontend, nodes, utils, writers, languages
  34 from docutils.writers import html4css1
  35
  36 class Writer(html4css1.Writer):
  37
  38     supported = ('html', 'xhtml', 'xhtml1',
  39                  'html4strict', 'xhtml1strict',
  40                  'xhtml11', 'xhtml1css2')
  41     """Formats this writer supports."""
  42
  43     default_stylesheets = ['html4css1.css', 'xhtml11.css']
  44     default_stylesheet_dirs = ['.',
  45         os.path.abspath(os.path.dirname(__file__)),
  46         os.path.abspath(os.path.join(
  47             os.path.dirname(os.path.dirname(__file__)), 'html4css1'))
  48                               ]
  49
  50     config_section = 'xhtml11 writer'
  51     config_section_dependencies = ('writers', 'html4css1 writer')
  52
  53     settings_spec = frontend.filter_settings_spec(
  54         html4css1.Writer.settings_spec,
  55         'field_name_limit', 'option_limit', # removed options
  56         stylesheet_path = (
  57           'Comma separated list of stylesheet paths. '
  58           'Relative paths are expanded if a matching file is found in '
  59           'the --stylesheet-dirs. With --link-stylesheet, '
  60           'the path is rewritten relative to the output HTML file. '
  61           'Default: "%s"' % ','.join(default_stylesheets),
  62           ['--stylesheet-path'],
  63           {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
  64            'validator': frontend.validate_comma_separated_list,
  65            'default': default_stylesheets}),
  66         stylesheet_dirs = (
  67           'Comma-separated list of directories where stylesheets are found. '
  68           'Used by --stylesheet-path when expanding relative path arguments. '
  69           'Default: "%s"' % default_stylesheet_dirs,
  70           ['--stylesheet-dirs'],
  71           {'metavar': '<dir[,dir,...]>',
  72            'validator': frontend.validate_comma_separated_list,
  73            'default': default_stylesheet_dirs}),
  74         math_output = ('Math output format, one of "MathML", "HTML", '
  75             '"MathJax" or "LaTeX". Default: "MathML"',
  76             ['--math-output'],
  77             {'default': 'MathML'}))
  78
  79     def __init__(self):
  80         writers.Writer.__init__(self)
  81         self.translator_class = HTMLTranslator
  82
  83
  84 class HTMLTranslator(html4css1.HTMLTranslator):
  85     """
  86     This writer generates XHTML 1.1
  87     without formatting that interferes with a CSS stylesheet.
  88     """
  89     doctype = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
  90                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
  91     doctype_mathml = (
  92         '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" '
  93         '"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">\n')
  94
  95     # there is no attribute "lang" in XHTML 1.1
  96     head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
  97                             ' xml:lang="%(lang)s">\n<head>\n')
  98     lang_attribute = 'xml:lang' # changed from 'lang' in XHTML 1.0
  99
 100
 101     # Do not  mark the first child with 'class="first"' and the last
 102     # child with 'class="last"' in definitions, table cells, field
 103     # bodies, option descriptions, and list items. Use the
 104     # ``:first-child`` and ``:last-child`` selectors instad.
 105
 106     def set_first_last(self, node):
 107         pass
 108
 109     # Compact lists
 110     # ------------
 111     # Include field lists (in addition to ordered and unordered lists)
 112     # in the test if a list is "simple"  (cf. the html4css1.HTMLTranslator
 113     # docstring and the SimpleListChecker class at the end of this file).
 114
 115     def is_compactable(self, node):
 116         # print "is_compactable %s ?" % node.__class__,
 117         # explicite class arguments have precedence
 118         if 'compact' in node['classes']:
 119             # print "explicitely compact"
 120             return True
 121         if 'open' in node['classes']:
 122             # print "explicitely open"
 123             return False
 124         # check config setting:
 125         if (isinstance(node, nodes.field_list) and
 126             not self.settings.compact_field_lists):
 127             # print "`compact-field-lists` is False"
 128             return False
 129         if (isinstance(node, nodes.enumerated_list) or
 130             isinstance(node, nodes.bullet_list)
 131            ) and not self.settings.compact_lists:
 132             # print "`compact-lists` is False"
 133             return False
 134         # more special cases:
 135         if (self.compact_simple or self.topic_classes == ['contents']):
 136             # print "self.compact_simple is True"
 137             return True
 138         # check the list items:
 139         visitor = SimpleListChecker(self.document)
 140         try:
 141             node.walk(visitor)
 142         except nodes.NodeFound:
 143             # print "complex node"
 144             return False
 145         else:
 146             # print "simple list"
 147             return True
 148
 149     # citations
 150     # ---------
 151     # Use definition list instead of table for bibliographic references.
 152     # Join adjacent citation entries.
 153
 154     def visit_citation(self, node):
 155         if self.body[-1] == '<-- next citation -->':
 156             del(self.body[-1])
 157         else:
 158             self.body.append('<dl class="citation">')
 159         self.context.append(self.starttag(node, 'dd'))
 160         self.footnote_backrefs(node)
 161
 162     def depart_citation(self, node):
 163         self.body.append('</dd>\n')
 164         if isinstance(node.next_node(), nodes.citation):
 165             self.body.append('<-- next citation -->')
 166         else:
 167             self.body.append('</dl>\n')
 168
 169     # docinfo
 170     # -------
 171     # use definition list instead of table
 172
 173     def visit_docinfo(self, node):
 174         classes = 'docinfo'
 175         if (self.is_compactable(node)):
 176             classes += ' simple'
 177         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 178
 179     def depart_docinfo(self, node):
 180         self.body.append('</dl>\n')
 181
 182     def visit_docinfo_item(self, node, name, meta=True):
 183         if meta:
 184             meta_tag = '<meta name="%s" content="%s" />\n' \
 185                        % (name, self.attval(node.astext()))
 186             self.add_meta(meta_tag)
 187         self.body.append('<dt class="%s">%s</dt>\n'
 188                          % (name, self.language.labels[name]))
 189         self.body.append(self.starttag(node, 'dd', '', CLASS=name))
 190
 191     def depart_docinfo_item(self):
 192         self.body.append('</dd>\n')
 193
 194
 195     # enumerated lists
 196     # ----------------
 197     # The 'start' attribute does not conform to HTML4/XHTML1 Strict
 198     # (it will resurface in HTML5)
 199
 200     def visit_enumerated_list(self, node):
 201         atts = {}
 202         if 'start' in node:
 203             atts['style'] = 'counter-reset: item %d;' % (
 204                                                 node['start'] - 1)
 205         classes = node.setdefault('classes', [])
 206         if 'enumtype' in node:
 207             classes.append(node['enumtype'])
 208         if self.is_compactable(node) and not self.compact_simple:
 209             classes.append('simple')
 210         # @@@ To do: prefix, suffix. (?)
 211         self.context.append((self.compact_simple, self.compact_p))
 212         self.compact_p = False
 213         self.body.append(self.starttag(node, 'ol', **atts))
 214
 215     # field-list
 216     # ----------
 217     # set as definition list, styled with CSS
 218
 219     def visit_field_list(self, node):
 220         # Keep simple paragraphs in the field_body to enable CSS
 221         # rule to start body on new line if the label is too long
 222         self.context.append((self.compact_field_list, self.compact_p))
 223         self.compact_field_list, self.compact_p = False, False
 224         #
 225         classes = 'field-list'
 226         if (self.is_compactable(node)):
 227             classes += ' simple'
 228         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 229
 230     def depart_field_list(self, node):
 231         self.compact_field_list, self.compact_p = self.context.pop()
 232         self.body.append('</dl>\n')
 233
 234     def visit_field(self, node):
 235         pass
 236
 237     def depart_field(self, node):
 238         pass
 239
 240     def visit_field_name(self, node):
 241         self.body.append(self.starttag(node, 'dt', ''))
 242
 243     def depart_field_name(self, node):
 244         self.body.append('</dt>\n')
 245
 246     def visit_field_body(self, node):
 247         self.body.append(self.starttag(node, 'dd', ''))
 248
 249     def depart_field_body(self, node):
 250         self.body.append('</dd>\n')
 251
 252     # footnotes
 253     # ---------
 254     # use definition list instead of table for footnote text
 255
 256     def visit_footnote(self, node):
 257         if self.body[-1] == '<-- next footnote -->':
 258             del(self.body[-1])
 259         else:
 260             self.body.append('<dl class="footnote">')
 261         self.context.append(self.starttag(node, 'dd'))
 262         self.footnote_backrefs(node)
 263
 264     def depart_footnote(self, node):
 265         self.body.append('</dd>\n')
 266         next_siblings = node.traverse(descend=False, siblings=True,
 267                                       include_self=False)
 268         next = next_siblings and next_siblings[0]
 269         if isinstance(next, nodes.footnote):
 270             self.body.append('<-- next footnote -->')
 271         else:
 272             self.body.append('</dl>\n')
 273
 274     # footnote and citation label
 275     def label_delim(self, node, bracket, superscript):
 276         """put brackets around label?"""
 277         if isinstance(node.parent, nodes.footnote):
 278             if self.settings.footnote_references == 'brackets':
 279                 return bracket
 280             else:
 281                 return superscript
 282         else:
 283             assert isinstance(node.parent, nodes.citation)
 284             return bracket
 285
 286     def visit_label(self, node):
 287         # Context added in footnote_backrefs.
 288         suffix = '%s%s' % (self.context.pop(),
 289                            self.label_delim(node, '[', ''))
 290         self.body.append(self.starttag(node, 'dt', suffix, CLASS='label'))
 291
 292     def depart_label(self, node):
 293         delim = self.label_delim(node, ']', '')
 294         # Context added in footnote_backrefs.
 295         backref = self.context.pop()
 296         text = self.context.pop()
 297         # <dd> starttag added in visit_footnote() / visit_citation()
 298         starttag = self.context.pop()
 299         self.body.append('%s%s</dt>\n%s%s' % (delim, backref, starttag, text))
 300
 301
 302     def visit_generated(self, node):
 303         if 'sectnum' in node['classes']:
 304             # get section number (strip trailing no-break-spaces)
 305             sectnum = node.astext().rstrip(u' ')
 306             # print sectnum.encode('utf-8')
 307             self.body.append('<span class="sectnum">%s</span> '
 308                                     % self.encode(sectnum))
 309             # Content already processed:
 310             raise nodes.SkipNode
 311
 312     # def depart_generated(self, node):
 313     #     pass
 314
 315     # Image types to place in an <object> element
 316     # SVG as <img> supported since IE version 9
 317     # (but rendering problems remain (see standalonge_rst2xhtml11.xhtml test output)
 318     object_image_types = {'.swf': 'application/x-shockwave-flash'}
 319
 320     # Do not  mark the first child with 'class="first"'
 321     def visit_list_item(self, node):
 322         self.body.append(self.starttag(node, 'li', ''))
 323
 324     # inline literal
 325     def visit_literal(self, node):
 326         # special case: "code" role
 327         classes = node.get('classes', [])
 328         if 'code' in classes:
 329             # filter 'code' from class arguments
 330             node['classes'] = [cls for cls in classes if cls != 'code']
 331             self.body.append(self.starttag(node, 'code', ''))
 332             return
 333         self.body.append(
 334             self.starttag(node, 'tt', '', CLASS='literal'))
 335         text = node.astext()
 336         # remove hard line breaks (except if in a parsed-literal block)
 337         if not isinstance(node.parent, nodes.literal_block):
 338             text = text.replace('\n', ' ')
 339         # Protect text like ``--an-option`` and the regular expression
 340         # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
 341         for token in self.words_and_spaces.findall(text):
 342             if token.strip() and self.sollbruchstelle.search(token):
 343                 self.body.append('<span class="pre">%s</span>'
 344                                     % self.encode(token))
 345             else:
 346                 self.body.append(self.encode(token))
 347         self.body.append('</tt>')
 348         # Content already processed:
 349         raise nodes.SkipNode
 350
 351     def depart_literal(self, node):
 352         # skipped unless literal element is from "code" role:
 353         self.body.append('</code>')
 354
 355     # literal block and doctest block: no newline after <pre> tag
 356     # (leads to blank line in XHTML1.1)
 357     def visit_literal_block(self, node,):
 358         self.body.append(self.starttag(node, 'pre', suffix='',
 359                                        CLASS='literal-block'))
 360
 361     def visit_doctest_block(self, node):
 362         self.body.append(self.starttag(node, 'pre', suffix='',
 363                                        CLASS='doctest-block'))
 364
 365     # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
 366     def visit_meta(self, node):
 367         if node.hasattr('lang'):
 368             node['xml:lang'] = node['lang']
 369             del(node['lang'])
 370         meta = self.emptytag(node, 'meta', **node.non_default_attributes())
 371         self.add_meta(meta)
 372
 373
 374     # option-list as definition list, styled with CSS
 375     # ----------------------------------------------
 376
 377     def visit_option_list(self, node):
 378         self.body.append(
 379             self.starttag(node, 'dl', CLASS='option-list'))
 380
 381     def depart_option_list(self, node):
 382         self.body.append('</dl>\n')
 383
 384     def visit_option_list_item(self, node):
 385         pass
 386
 387     def depart_option_list_item(self, node):
 388         pass
 389
 390     def visit_option_group(self, node):
 391         self.body.append(self.starttag(node, 'dt', ''))
 392         self.body.append('<kbd>')
 393
 394     def depart_option_group(self, node):
 395         self.body.append('</kbd></dt>\n')
 396
 397     def visit_option(self, node):
 398         self.body.append(self.starttag(node, 'span', '', CLASS='option'))
 399
 400     def depart_option(self, node):
 401         self.body.append('</span>')
 402         if isinstance(node.next_node(descend=False, siblings=True),
 403                       nodes.option):
 404             self.body.append(', ')
 405
 406     def visit_description(self, node):
 407         self.body.append(self.starttag(node, 'dd', ''))
 408
 409     def depart_description(self, node):
 410         self.body.append('</dd>\n')
 411
 412     # Do not omit <p> tags
 413     # --------------------
 414     #
 415     # The HTML4CSS1 writer does this to "produce
 416     # visually compact lists (less vertical whitespace)". This writer
 417     # relies on CSS rules for"visual compactness".
 418     #
 419     # * In XHTML 1.1, e.g. a <blockquote> element may not contain
 420     #   character data, so you cannot drop the <p> tags.
 421     # * Keeping simple paragraphs in the field_body enables a CSS
 422     #   rule to start the field-body on new line if the label is too long
 423     # * it makes the code simpler.
 424     #
 425     # TODO: omit paragraph tags in simple table cells.
 426
 427     def visit_paragraph(self, node):
 428         self.body.append(self.starttag(node, 'p', ''))
 429
 430     def depart_paragraph(self, node):
 431         self.body.append('</p>')
 432         if not (isinstance(node.parent, (nodes.list_item, nodes.entry)) and
 433                 # (node is node.parent[-1])
 434                 (len(node.parent) == 1)
 435                ):
 436             self.body.append('\n')
 437
 438     # tables
 439     # ------
 440     # no hard-coded border setting in the table head::
 441
 442     def visit_table(self, node):
 443         classes = [cls.strip(u' \t\n')
 444                    for cls in self.settings.table_style.split(',')]
 445         tag = self.starttag(node, 'table', CLASS=' '.join(classes))
 446         self.body.append(tag)
 447
 448     def depart_table(self, node):
 449         self.body.append('</table>\n')
 450
 451     # no hard-coded vertical alignment in table body::
 452
 453     def visit_tbody(self, node):
 454         self.write_colspecs()
 455         self.body.append(self.context.pop()) # '</colgroup>\n' or ''
 456         self.body.append(self.starttag(node, 'tbody'))
 457
 458
 459 class SimpleListChecker(html4css1.SimpleListChecker):
 460
 461     """
 462     Raise `nodes.NodeFound` if non-simple list item is encountered.
 463
 464     Here "simple" means a list item containing nothing other than a single
 465     paragraph, a simple list, or a paragraph followed by a simple list.
 466
 467     This version also checks for simple field lists and docinfo.
 468     """
 469     # # debugging: copy of parent methods with `print` calls
 470     # def default_visit(self, node):
 471     #     print "found", node.__class__, "in", node.parent.__class__
 472     #     raise nodes.NodeFound
 473
 474     def _pass_node(self, node):
 475         pass
 476
 477     def _simple_node(self, node):
 478         # nodes that are never complex (can contain only inline nodes)
 479         raise nodes.SkipNode
 480
 481     def visit_list_item(self, node):
 482         # print "visiting list item", node.__class__
 483         children = []
 484         for child in node.children:
 485             if not isinstance(child, nodes.Invisible):
 486                 children.append(child)
 487         # print "has %s visible children" % len(children)
 488         if (children and isinstance(children[0], nodes.paragraph)
 489             and (isinstance(children[-1], nodes.bullet_list) or
 490                  isinstance(children[-1], nodes.enumerated_list) or
 491                  isinstance(children[-1], nodes.field_list))):
 492             children.pop()
 493         # print "%s children remain" % len(children)
 494         if len(children) <= 1:
 495             return
 496         else:
 497             # print "found", child.__class__, "in", node.__class__
 498             raise nodes.NodeFound
 499
 500     # Docinfo nodes:
 501     visit_docinfo = _pass_node
 502     visit_author = _simple_node
 503     visit_authors = visit_list_item
 504     visit_address = visit_list_item
 505     visit_contact = _pass_node
 506     visit_copyright = _simple_node
 507     visit_date = _simple_node
 508     visit_organization = _simple_node
 509     visit_status = _simple_node
 510     visit_version = visit_list_item
 511
 512     # Field list items
 513     visit_field_list = _pass_node
 514     visit_field = _pass_node
 515     # the field body corresponds to a list item
 516     # visit_field_body = html4css1.SimpleListChecker.visit_list_item
 517     visit_field_body = visit_list_item
 518     visit_field_name = html4css1.SimpleListChecker.invisible_visit
 519
 520     # Inline nodes
 521     visit_Text = _pass_node