docutils/writers/xhtml11/__init__.py

   1 # .. coding: utf8
   2 # :Author: Günter Milde <milde@users.berlios.de>
   3 # :Revision: $Revision$
   4 # :Date: $Date: 2005-06-28$
   5 # :Copyright: © 2005, 2009 Günter Milde.
   6 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
   7 #
   8 #    Copying and distribution of this file, with or without modification,
   9 #    are permitted in any medium without royalty provided the copyright
  10 #    notice and this notice are preserved.
  11 #    This file is offered as-is, without any warranty.
  12 #
  13 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
  14
  15 """
  16 Strict HyperText Markup Language document tree Writer.
  17
  18 This is a variant of Docutils' standard 'html4css1' writer.
  19
  20 GOAL:
  21  * The output conforms to the XHTML version 1.1 DTD.
  22  * It contains no hard-coded formatting information that would prevent
  23    layout design by cascading style sheets.
  24 """
  25
  26 __docformat__ = 'reStructuredText'
  27
  28 import os
  29 import os.path
  30 import re
  31
  32 import docutils
  33 from docutils import frontend, nodes, utils, writers, languages
  34 from docutils.writers import html4css1
  35
  36 class Writer(html4css1.Writer):
  37
  38     supported = ('html', 'xhtml', 'xhtml1',
  39                  'html4strict', 'xhtml1strict',
  40                  'xhtml11', 'xhtml1css2')
  41     """Formats this writer supports."""
  42
  43     default_stylesheets = ['html4css1.css', 'xhtml11.css']
  44     default_stylesheet_dirs = ['.',
  45         os.path.abspath(os.path.dirname(__file__)),
  46         os.path.abspath(os.path.join(
  47             os.path.dirname(os.path.dirname(__file__)), 'html4css1'))
  48                               ]
  49
  50     config_section = 'xhtml11 writer'
  51     config_section_dependencies = ('writers', 'html4css1 writer')
  52
  53     settings_spec = frontend.filter_settings_spec(
  54         html4css1.Writer.settings_spec,
  55         'field_name_limit', 'option_limit', # removed options
  56         stylesheet_path = (
  57           'Comma separated list of stylesheet paths. '
  58           'Relative paths are expanded if a matching file is found in '
  59           'the --stylesheet-dirs. With --link-stylesheet, '
  60           'the path is rewritten relative to the output HTML file. '
  61           'Default: "%s"' % ','.join(default_stylesheets),
  62           ['--stylesheet-path'],
  63           {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
  64            'validator': frontend.validate_comma_separated_list,
  65            'default': default_stylesheets}),
  66
  67         stylesheet_dirs = (
  68           'Comma-separated list of directories where stylesheets are found. '
  69           'Used by --stylesheet-path when expanding relative path arguments. '
  70           'Default: "%s"' % default_stylesheet_dirs,
  71           ['--stylesheet-dirs'],
  72           {'metavar': '<dir[,dir,...]>',
  73            'validator': frontend.validate_comma_separated_list,
  74            'default': default_stylesheet_dirs}),
  75         math_output = ('Math output format, one of "MathML", "HTML", '
  76             '"MathJax" or "LaTeX". Default: "MathML"',
  77             ['--math-output'],
  78             {'default': 'MathML'}))
  79
  80     def __init__(self):
  81         writers.Writer.__init__(self)
  82         self.translator_class = HTMLTranslator
  83
  84
  85 class HTMLTranslator(html4css1.HTMLTranslator):
  86     """
  87     This writer generates XHTML 1.1
  88     without formatting that interferes with a CSS stylesheet.
  89     """
  90     doctype = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
  91                '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
  92     doctype_mathml = (
  93         '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" '
  94         '"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">\n')
  95
  96     # there is no attribute "lang" in XHTML 1.1
  97     head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
  98                             ' xml:lang="%(lang)s">\n<head>\n')
  99     lang_attribute = 'xml:lang' # changed from 'lang' in XHTML 1.0
 100
 101
 102     # Do not  mark the first child with 'class="first"' and the last
 103     # child with 'class="last"' in definitions, table cells, field
 104     # bodies, option descriptions, and list items. Use the
 105     # ``:first-child`` and ``:last-child`` selectors instad.
 106
 107     def set_first_last(self, node):
 108         pass
 109
 110     # Compact lists
 111     # ------------
 112     # Include field lists (in addition to ordered and unordered lists)
 113     # in the test if a list is "simple"  (cf. the html4css1.HTMLTranslator
 114     # docstring and the SimpleListChecker class at the end of this file).
 115
 116     def is_compactable(self, node):
 117         # print "is_compactable %s ?" % node.__class__,
 118         # explicite class arguments have precedence
 119         if 'compact' in node['classes']:
 120             # print "explicitely compact"
 121             return True
 122         if 'open' in node['classes']:
 123             # print "explicitely open"
 124             return False
 125         # check config setting:
 126         if (isinstance(node, nodes.field_list) and
 127             not self.settings.compact_field_lists):
 128             # print "`compact-field-lists` is False"
 129             return False
 130         if (isinstance(node, nodes.enumerated_list) or
 131             isinstance(node, nodes.bullet_list)
 132            ) and not self.settings.compact_lists:
 133             # print "`compact-lists` is False"
 134             return False
 135         # more special cases:
 136         if (self.compact_simple or self.topic_classes == ['contents']):
 137             # print "self.compact_simple is True"
 138             return True
 139         # check the list items:
 140         visitor = SimpleListChecker(self.document)
 141         try:
 142             node.walk(visitor)
 143         except nodes.NodeFound:
 144             # print "complex node"
 145             return False
 146         else:
 147             # print "simple list"
 148             return True
 149
 150     # citations
 151     # ---------
 152     # Use definition list instead of table for bibliographic references.
 153     # Join adjacent citation entries.
 154
 155     def visit_citation(self, node):
 156         if self.body[-1] == '<-- next citation -->':
 157             del(self.body[-1])
 158         else:
 159             self.body.append('<dl class="citation">')
 160         self.context.append(self.starttag(node, 'dd'))
 161         self.footnote_backrefs(node)
 162
 163     def depart_citation(self, node):
 164         self.body.append('</dd>\n')
 165         if isinstance(node.next_node(), nodes.citation):
 166             self.body.append('<-- next citation -->')
 167         else:
 168             self.body.append('</dl>\n')
 169
 170     # docinfo
 171     # -------
 172     # use definition list instead of table
 173
 174     def visit_docinfo(self, node):
 175         classes = 'docinfo'
 176         if (self.is_compactable(node)):
 177             classes += ' simple'
 178         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 179
 180     def depart_docinfo(self, node):
 181         self.body.append('</dl>\n')
 182
 183     def visit_docinfo_item(self, node, name, meta=True):
 184         if meta:
 185             meta_tag = '<meta name="%s" content="%s" />\n' \
 186                        % (name, self.attval(node.astext()))
 187             self.add_meta(meta_tag)
 188         self.body.append('<dt class="%s">%s</dt>\n'
 189                          % (name, self.language.labels[name]))
 190         self.body.append(self.starttag(node, 'dd', '', CLASS=name))
 191
 192     def depart_docinfo_item(self):
 193         self.body.append('</dd>\n')
 194
 195
 196     # enumerated lists
 197     # ----------------
 198     # The 'start' attribute does not conform to HTML4/XHTML1 Strict
 199     # (it will resurface in HTML5)
 200
 201     def visit_enumerated_list(self, node):
 202         atts = {}
 203         if 'start' in node:
 204             atts['style'] = 'counter-reset: item %d;' % (
 205                                                 node['start'] - 1)
 206         classes = node.setdefault('classes', [])
 207         if 'enumtype' in node:
 208             classes.append(node['enumtype'])
 209         if self.is_compactable(node) and not self.compact_simple:
 210             classes.append('simple')
 211         # @@@ To do: prefix, suffix. (?)
 212         self.context.append((self.compact_simple, self.compact_p))
 213         self.compact_p = False
 214         self.body.append(self.starttag(node, 'ol', **atts))
 215
 216     # field-list
 217     # ----------
 218     # set as definition list, styled with CSS
 219
 220     def visit_field_list(self, node):
 221         # Keep simple paragraphs in the field_body to enable CSS
 222         # rule to start body on new line if the label is too long
 223         self.context.append((self.compact_field_list, self.compact_p))
 224         self.compact_field_list, self.compact_p = False, False
 225         #
 226         classes = 'field-list'
 227         if (self.is_compactable(node)):
 228             classes += ' simple'
 229         self.body.append(self.starttag(node, 'dl', CLASS=classes))
 230
 231     def depart_field_list(self, node):
 232         self.compact_field_list, self.compact_p = self.context.pop()
 233         self.body.append('</dl>\n')
 234
 235     def visit_field(self, node):
 236         pass
 237
 238     def depart_field(self, node):
 239         pass
 240
 241     def visit_field_name(self, node):
 242         self.body.append(self.starttag(node, 'dt', ''))
 243
 244     def depart_field_name(self, node):
 245         self.body.append('</dt>\n')
 246
 247     def visit_field_body(self, node):
 248         self.body.append(self.starttag(node, 'dd', ''))
 249
 250     def depart_field_body(self, node):
 251         self.body.append('</dd>\n')
 252
 253     # footnotes
 254     # ---------
 255     # use definition list instead of table for footnote text
 256
 257     def visit_footnote(self, node):
 258         if self.body[-1] == '<-- next footnote -->':
 259             del(self.body[-1])
 260         else:
 261             self.body.append('<dl class="footnote">')
 262         self.context.append(self.starttag(node, 'dd'))
 263         self.footnote_backrefs(node)
 264
 265     def depart_footnote(self, node):
 266         self.body.append('</dd>\n')
 267         next_siblings = node.traverse(descend=False, siblings=True,
 268                                       include_self=False)
 269         next = next_siblings and next_siblings[0]
 270         if isinstance(next, nodes.footnote):
 271             self.body.append('<-- next footnote -->')
 272         else:
 273             self.body.append('</dl>\n')
 274
 275     # footnote and citation label
 276     def label_delim(self, node, bracket, superscript):
 277         """put brackets around label?"""
 278         if isinstance(node.parent, nodes.footnote):
 279             if self.settings.footnote_references == 'brackets':
 280                 return bracket
 281             else:
 282                 return superscript
 283         else:
 284             assert isinstance(node.parent, nodes.citation)
 285             return bracket
 286
 287     def visit_label(self, node):
 288         # Context added in footnote_backrefs.
 289         suffix = '%s%s' % (self.context.pop(),
 290                            self.label_delim(node, '[', ''))
 291         self.body.append(self.starttag(node, 'dt', suffix, CLASS='label'))
 292
 293     def depart_label(self, node):
 294         delim = self.label_delim(node, ']', '')
 295         # Context added in footnote_backrefs.
 296         backref = self.context.pop()
 297         text = self.context.pop()
 298         # <dd> starttag added in visit_footnote() / visit_citation()
 299         starttag = self.context.pop()
 300         self.body.append('%s%s</dt>\n%s%s' % (delim, backref, starttag, text))
 301
 302
 303     def visit_generated(self, node):
 304         if 'sectnum' in node['classes']:
 305             # get section number (strip trailing no-break-spaces)
 306             sectnum = node.astext().rstrip(u' ')
 307             # print sectnum.encode('utf-8')
 308             self.body.append('<span class="sectnum">%s</span> '
 309                                     % self.encode(sectnum))
 310             # Content already processed:
 311             raise nodes.SkipNode
 312
 313     # def depart_generated(self, node):
 314     #     pass
 315
 316     # Image types to place in an <object> element
 317     # SVG as <img> supported since IE version 9
 318     # (but rendering problems remain (see standalonge_rst2xhtml11.xhtml test output)
 319     object_image_types = {'.swf': 'application/x-shockwave-flash'}
 320
 321     # Do not  mark the first child with 'class="first"'
 322     def visit_list_item(self, node):
 323         self.body.append(self.starttag(node, 'li', ''))
 324
 325     # inline literal
 326     def visit_literal(self, node):
 327         # special case: "code" role
 328         classes = node.get('classes', [])
 329         if 'code' in classes:
 330             # filter 'code' from class arguments
 331             node['classes'] = [cls for cls in classes if cls != 'code']
 332             self.body.append(self.starttag(node, 'code', ''))
 333             return
 334         self.body.append(
 335             self.starttag(node, 'tt', '', CLASS='literal'))
 336         text = node.astext()
 337         # remove hard line breaks (except if in a parsed-literal block)
 338         if not isinstance(node.parent, nodes.literal_block):
 339             text = text.replace('\n', ' ')
 340         # Protect text like ``--an-option`` and the regular expression
 341         # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
 342         for token in self.words_and_spaces.findall(text):
 343             if token.strip() and self.sollbruchstelle.search(token):
 344                 self.body.append('<span class="pre">%s</span>'
 345                                     % self.encode(token))
 346             else:
 347                 self.body.append(self.encode(token))
 348         self.body.append('</tt>')
 349         # Content already processed:
 350         raise nodes.SkipNode
 351
 352     def depart_literal(self, node):
 353         # skipped unless literal element is from "code" role:
 354         self.body.append('</code>')
 355
 356     # literal block and doctest block: no newline after <pre> tag
 357     # (leads to blank line in XHTML1.1)
 358     def visit_literal_block(self, node,):
 359         self.body.append(self.starttag(node, 'pre', suffix='',
 360                                        CLASS='literal-block'))
 361
 362     def visit_doctest_block(self, node):
 363         self.body.append(self.starttag(node, 'pre', suffix='',
 364                                        CLASS='doctest-block'))
 365
 366     # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
 367     def visit_meta(self, node):
 368         if node.hasattr('lang'):
 369             node['xml:lang'] = node['lang']
 370             del(node['lang'])
 371         meta = self.emptytag(node, 'meta', **node.non_default_attributes())
 372         self.add_meta(meta)
 373
 374
 375     # option-list as definition list, styled with CSS
 376     # ----------------------------------------------
 377
 378     def visit_option_list(self, node):
 379         self.body.append(
 380             self.starttag(node, 'dl', CLASS='option-list'))
 381
 382     def depart_option_list(self, node):
 383         self.body.append('</dl>\n')
 384
 385     def visit_option_list_item(self, node):
 386         pass
 387
 388     def depart_option_list_item(self, node):
 389         pass
 390
 391     def visit_option_group(self, node):
 392         self.body.append(self.starttag(node, 'dt', ''))
 393         self.body.append('<kbd>')
 394
 395     def depart_option_group(self, node):
 396         self.body.append('</kbd></dt>\n')
 397
 398     def visit_option(self, node):
 399         self.body.append(self.starttag(node, 'span', '', CLASS='option'))
 400
 401     def depart_option(self, node):
 402         self.body.append('</span>')
 403         if isinstance(node.next_node(descend=False, siblings=True),
 404                       nodes.option):
 405             self.body.append(', ')
 406
 407     def visit_description(self, node):
 408         self.body.append(self.starttag(node, 'dd', ''))
 409
 410     def depart_description(self, node):
 411         self.body.append('</dd>\n')
 412
 413     # Do not omit <p> tags
 414     # --------------------
 415     #
 416     # The HTML4CSS1 writer does this to "produce
 417     # visually compact lists (less vertical whitespace)". This writer
 418     # relies on CSS rules for"visual compactness".
 419     #
 420     # * In XHTML 1.1, e.g. a <blockquote> element may not contain
 421     #   character data, so you cannot drop the <p> tags.
 422     # * Keeping simple paragraphs in the field_body enables a CSS
 423     #   rule to start the field-body on new line if the label is too long
 424     # * it makes the code simpler.
 425     #
 426     # TODO: omit paragraph tags in simple table cells.
 427
 428     def visit_paragraph(self, node):
 429         self.body.append(self.starttag(node, 'p', ''))
 430
 431     def depart_paragraph(self, node):
 432         self.body.append('</p>')
 433         if not (isinstance(node.parent, (nodes.list_item, nodes.entry)) and
 434                 # (node is node.parent[-1])
 435                 (len(node.parent) == 1)
 436                ):
 437             self.body.append('\n')
 438
 439     # tables
 440     # ------
 441     # no hard-coded border setting in the table head::
 442
 443     def visit_table(self, node):
 444         classes = [cls.strip(u' \t\n')
 445                    for cls in self.settings.table_style.split(',')]
 446         tag = self.starttag(node, 'table', CLASS=' '.join(classes))
 447         self.body.append(tag)
 448
 449     def depart_table(self, node):
 450         self.body.append('</table>\n')
 451
 452     # no hard-coded vertical alignment in table body::
 453
 454     def visit_tbody(self, node):
 455         self.write_colspecs()
 456         self.body.append(self.context.pop()) # '</colgroup>\n' or ''
 457         self.body.append(self.starttag(node, 'tbody'))
 458
 459
 460 class SimpleListChecker(html4css1.SimpleListChecker):
 461
 462     """
 463     Raise `nodes.NodeFound` if non-simple list item is encountered.
 464
 465     Here "simple" means a list item containing nothing other than a single
 466     paragraph, a simple list, or a paragraph followed by a simple list.
 467
 468     This version also checks for simple field lists and docinfo.
 469     """
 470     # # debugging: copy of parent methods with `print` calls
 471     # def default_visit(self, node):
 472     #     print "found", node.__class__, "in", node.parent.__class__
 473     #     raise nodes.NodeFound
 474
 475     def _pass_node(self, node):
 476         pass
 477
 478     def _simple_node(self, node):
 479         # nodes that are never complex (can contain only inline nodes)
 480         raise nodes.SkipNode
 481
 482     def visit_list_item(self, node):
 483         # print "visiting list item", node.__class__
 484         children = []
 485         for child in node.children:
 486             if not isinstance(child, nodes.Invisible):
 487                 children.append(child)
 488         # print "has %s visible children" % len(children)
 489         if (children and isinstance(children[0], nodes.paragraph)
 490             and (isinstance(children[-1], nodes.bullet_list) or
 491                  isinstance(children[-1], nodes.enumerated_list) or
 492                  isinstance(children[-1], nodes.field_list))):
 493             children.pop()
 494         # print "%s children remain" % len(children)
 495         if len(children) <= 1:
 496             return
 497         else:
 498             # print "found", child.__class__, "in", node.__class__
 499             raise nodes.NodeFound
 500
 501     # Docinfo nodes:
 502     visit_docinfo = _pass_node
 503     visit_author = _simple_node
 504     visit_authors = visit_list_item
 505     visit_address = visit_list_item
 506     visit_contact = _pass_node
 507     visit_copyright = _simple_node
 508     visit_date = _simple_node
 509     visit_organization = _simple_node
 510     visit_status = _simple_node
 511     visit_version = visit_list_item
 512
 513     # Field list items
 514     visit_field_list = _pass_node
 515     visit_field = _pass_node
 516     # the field body corresponds to a list item
 517     # visit_field_body = html4css1.SimpleListChecker.visit_list_item
 518     visit_field_body = visit_list_item
 519     visit_field_name = html4css1.SimpleListChecker.invisible_visit
 520
 521     # Inline nodes
 522     visit_Text = _pass_node