Basic documentation for the xhtml11 writer and frontend.
[docutils.git] / docutils / writers / xhtml11 / __init__.py
blobe95d75e97db84c01e31407163587f0ba4e590f93
1 # .. coding: utf8
2 # :Author: Günter Milde <milde@users.berlios.de>
3 # :Revision: $Revision$
4 # :Date: $Date: 2005-06-28$
5 # :Copyright: © 2005, 2009 Günter Milde.
6 # :License: Released under the terms of the `2-Clause BSD license`_, in short:
8 # Copying and distribution of this file, with or without modification,
9 # are permitted in any medium without royalty provided the copyright
10 # notice and this notice are preserved.
11 # This file is offered as-is, without any warranty.
13 # .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause
15 """
16 Strict HyperText Markup Language document tree Writer.
18 This is a variant of Docutils' standard 'html4css1' writer.
20 GOAL:
21 * The output conforms to the XHTML version 1.1 DTD.
22 * It contains no hard-coded formatting information that would prevent
23 layout design by cascading style sheets.
24 """
26 __docformat__ = 'reStructuredText'
28 import os
29 import os.path
30 import re
32 import docutils
33 from docutils import frontend, nodes, utils, writers, languages
34 from docutils.writers import html4css1
36 class Writer(html4css1.Writer):
38 supported = ('html', 'xhtml', 'xhtml1',
39 'html4strict', 'xhtml1strict',
40 'xhtml11', 'xhtml1css2')
41 """Formats this writer supports."""
43 default_stylesheets = ['html4css1.css', 'xhtml11.css']
44 default_stylesheet_dirs = ['.',
45 os.path.abspath(os.path.dirname(__file__)),
46 os.path.abspath(os.path.join(
47 os.path.dirname(os.path.dirname(__file__)), 'html4css1'))
50 config_section = 'xhtml11 writer'
51 config_section_dependencies = ('writers', 'html4css1 writer')
53 settings_spec = frontend.filter_settings_spec(
54 html4css1.Writer.settings_spec,
55 'field_name_limit', 'option_limit', # removed options
56 stylesheet_path = (
57 'Comma separated list of stylesheet paths. '
58 'Relative paths are expanded if a matching file is found in '
59 'the --stylesheet-dirs. With --link-stylesheet, '
60 'the path is rewritten relative to the output HTML file. '
61 'Default: "%s"' % ','.join(default_stylesheets),
62 ['--stylesheet-path'],
63 {'metavar': '<file[,file,...]>', 'overrides': 'stylesheet',
64 'validator': frontend.validate_comma_separated_list,
65 'default': default_stylesheets}),
66 stylesheet_dirs = (
67 'Comma-separated list of directories where stylesheets are found. '
68 'Used by --stylesheet-path when expanding relative path arguments. '
69 'Default: "%s"' % default_stylesheet_dirs,
70 ['--stylesheet-dirs'],
71 {'metavar': '<dir[,dir,...]>',
72 'validator': frontend.validate_comma_separated_list,
73 'default': default_stylesheet_dirs}),
74 math_output = ('Math output format, one of "MathML", "HTML", '
75 '"MathJax" or "LaTeX". Default: "MathML"',
76 ['--math-output'],
77 {'default': 'MathML'}))
79 def __init__(self):
80 writers.Writer.__init__(self)
81 self.translator_class = HTMLTranslator
84 class HTMLTranslator(html4css1.HTMLTranslator):
85 """
86 This writer generates XHTML 1.1
87 without formatting that interferes with a CSS stylesheet.
88 """
89 doctype = ('<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" '
90 '"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">\n')
91 doctype_mathml = (
92 '<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1 plus MathML 2.0//EN" '
93 '"http://www.w3.org/Math/DTD/mathml2/xhtml-math11-f.dtd">\n')
95 # there is no attribute "lang" in XHTML 1.1
96 head_prefix_template = ('<html xmlns="http://www.w3.org/1999/xhtml"'
97 ' xml:lang="%(lang)s">\n<head>\n')
98 lang_attribute = 'xml:lang' # changed from 'lang' in XHTML 1.0
101 # Do not mark the first child with 'class="first"' and the last
102 # child with 'class="last"' in definitions, table cells, field
103 # bodies, option descriptions, and list items. Use the
104 # ``:first-child`` and ``:last-child`` selectors instad.
106 def set_first_last(self, node):
107 pass
109 # Compact lists
110 # ------------
111 # Include field lists (in addition to ordered and unordered lists)
112 # in the test if a list is "simple" (cf. the html4css1.HTMLTranslator
113 # docstring and the SimpleListChecker class at the end of this file).
115 def is_compactable(self, node):
116 # print "is_compactable %s ?" % node.__class__,
117 # explicite class arguments have precedence
118 if 'compact' in node['classes']:
119 # print "explicitely compact"
120 return True
121 if 'open' in node['classes']:
122 # print "explicitely open"
123 return False
124 # check config setting:
125 if (isinstance(node, nodes.field_list) and
126 not self.settings.compact_field_lists):
127 # print "`compact-field-lists` is False"
128 return False
129 if (isinstance(node, nodes.enumerated_list) or
130 isinstance(node, nodes.bullet_list)
131 ) and not self.settings.compact_lists:
132 # print "`compact-lists` is False"
133 return False
134 # more special cases:
135 if (self.compact_simple or self.topic_classes == ['contents']):
136 # print "self.compact_simple is True"
137 return True
138 # check the list items:
139 visitor = SimpleListChecker(self.document)
140 try:
141 node.walk(visitor)
142 except nodes.NodeFound:
143 # print "complex node"
144 return False
145 else:
146 # print "simple list"
147 return True
149 # citations
150 # ---------
151 # Use definition list instead of table for bibliographic references.
152 # Join adjacent citation entries.
154 def visit_citation(self, node):
155 if self.body[-1] == '<-- next citation -->':
156 del(self.body[-1])
157 else:
158 self.body.append('<dl class="citation">')
159 self.context.append(self.starttag(node, 'dd'))
160 self.footnote_backrefs(node)
162 def depart_citation(self, node):
163 self.body.append('</dd>\n')
164 if isinstance(node.next_node(), nodes.citation):
165 self.body.append('<-- next citation -->')
166 else:
167 self.body.append('</dl>\n')
169 # docinfo
170 # -------
171 # use definition list instead of table
173 def visit_docinfo(self, node):
174 classes = 'docinfo'
175 if (self.is_compactable(node)):
176 classes += ' simple'
177 self.body.append(self.starttag(node, 'dl', CLASS=classes))
179 def depart_docinfo(self, node):
180 self.body.append('</dl>\n')
182 def visit_docinfo_item(self, node, name, meta=True):
183 if meta:
184 meta_tag = '<meta name="%s" content="%s" />\n' \
185 % (name, self.attval(node.astext()))
186 self.add_meta(meta_tag)
187 self.body.append('<dt class="%s">%s</dt>\n'
188 % (name, self.language.labels[name]))
189 self.body.append(self.starttag(node, 'dd', '', CLASS=name))
191 def depart_docinfo_item(self):
192 self.body.append('</dd>\n')
195 # enumerated lists
196 # ----------------
197 # The 'start' attribute does not conform to HTML4/XHTML1 Strict
198 # (it will resurface in HTML5)
200 def visit_enumerated_list(self, node):
201 atts = {}
202 if 'start' in node:
203 atts['style'] = 'counter-reset: item %d;' % (
204 node['start'] - 1)
205 classes = node.setdefault('classes', [])
206 if 'enumtype' in node:
207 classes.append(node['enumtype'])
208 if self.is_compactable(node) and not self.compact_simple:
209 classes.append('simple')
210 # @@@ To do: prefix, suffix. (?)
211 self.context.append((self.compact_simple, self.compact_p))
212 self.compact_p = False
213 self.body.append(self.starttag(node, 'ol', **atts))
215 # field-list
216 # ----------
217 # set as definition list, styled with CSS
219 def visit_field_list(self, node):
220 # Keep simple paragraphs in the field_body to enable CSS
221 # rule to start body on new line if the label is too long
222 self.context.append((self.compact_field_list, self.compact_p))
223 self.compact_field_list, self.compact_p = False, False
225 classes = 'field-list'
226 if (self.is_compactable(node)):
227 classes += ' simple'
228 self.body.append(self.starttag(node, 'dl', CLASS=classes))
230 def depart_field_list(self, node):
231 self.compact_field_list, self.compact_p = self.context.pop()
232 self.body.append('</dl>\n')
234 def visit_field(self, node):
235 pass
237 def depart_field(self, node):
238 pass
240 def visit_field_name(self, node):
241 self.body.append(self.starttag(node, 'dt', ''))
243 def depart_field_name(self, node):
244 self.body.append('</dt>\n')
246 def visit_field_body(self, node):
247 self.body.append(self.starttag(node, 'dd', ''))
249 def depart_field_body(self, node):
250 self.body.append('</dd>\n')
252 # footnotes
253 # ---------
254 # use definition list instead of table for footnote text
256 def visit_footnote(self, node):
257 if self.body[-1] == '<-- next footnote -->':
258 del(self.body[-1])
259 else:
260 self.body.append('<dl class="footnote">')
261 self.context.append(self.starttag(node, 'dd'))
262 self.footnote_backrefs(node)
264 def depart_footnote(self, node):
265 self.body.append('</dd>\n')
266 next_siblings = node.traverse(descend=False, siblings=True,
267 include_self=False)
268 next = next_siblings and next_siblings[0]
269 if isinstance(next, nodes.footnote):
270 self.body.append('<-- next footnote -->')
271 else:
272 self.body.append('</dl>\n')
274 # footnote and citation label
275 def label_delim(self, node, bracket, superscript):
276 """put brackets around label?"""
277 if isinstance(node.parent, nodes.footnote):
278 if self.settings.footnote_references == 'brackets':
279 return bracket
280 else:
281 return superscript
282 else:
283 assert isinstance(node.parent, nodes.citation)
284 return bracket
286 def visit_label(self, node):
287 # Context added in footnote_backrefs.
288 suffix = '%s%s' % (self.context.pop(),
289 self.label_delim(node, '[', ''))
290 self.body.append(self.starttag(node, 'dt', suffix, CLASS='label'))
292 def depart_label(self, node):
293 delim = self.label_delim(node, ']', '')
294 # Context added in footnote_backrefs.
295 backref = self.context.pop()
296 text = self.context.pop()
297 # <dd> starttag added in visit_footnote() / visit_citation()
298 starttag = self.context.pop()
299 self.body.append('%s%s</dt>\n%s%s' % (delim, backref, starttag, text))
302 def visit_generated(self, node):
303 if 'sectnum' in node['classes']:
304 # get section number (strip trailing no-break-spaces)
305 sectnum = node.astext().rstrip(u' ')
306 # print sectnum.encode('utf-8')
307 self.body.append('<span class="sectnum">%s</span> '
308 % self.encode(sectnum))
309 # Content already processed:
310 raise nodes.SkipNode
312 # def depart_generated(self, node):
313 # pass
315 # Image types to place in an <object> element
316 # SVG as <img> supported since IE version 9
317 # (but rendering problems remain (see standalonge_rst2xhtml11.xhtml test output)
318 object_image_types = {'.swf': 'application/x-shockwave-flash'}
320 # Do not mark the first child with 'class="first"'
321 def visit_list_item(self, node):
322 self.body.append(self.starttag(node, 'li', ''))
324 # inline literal
325 def visit_literal(self, node):
326 # special case: "code" role
327 classes = node.get('classes', [])
328 if 'code' in classes:
329 # filter 'code' from class arguments
330 node['classes'] = [cls for cls in classes if cls != 'code']
331 self.body.append(self.starttag(node, 'code', ''))
332 return
333 self.body.append(
334 self.starttag(node, 'tt', '', CLASS='literal'))
335 text = node.astext()
336 # remove hard line breaks (except if in a parsed-literal block)
337 if not isinstance(node.parent, nodes.literal_block):
338 text = text.replace('\n', ' ')
339 # Protect text like ``--an-option`` and the regular expression
340 # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping
341 for token in self.words_and_spaces.findall(text):
342 if token.strip() and self.sollbruchstelle.search(token):
343 self.body.append('<span class="pre">%s</span>'
344 % self.encode(token))
345 else:
346 self.body.append(self.encode(token))
347 self.body.append('</tt>')
348 # Content already processed:
349 raise nodes.SkipNode
351 def depart_literal(self, node):
352 # skipped unless literal element is from "code" role:
353 self.body.append('</code>')
355 # literal block and doctest block: no newline after <pre> tag
356 # (leads to blank line in XHTML1.1)
357 def visit_literal_block(self, node,):
358 self.body.append(self.starttag(node, 'pre', suffix='',
359 CLASS='literal-block'))
361 def visit_doctest_block(self, node):
362 self.body.append(self.starttag(node, 'pre', suffix='',
363 CLASS='doctest-block'))
365 # Meta tags: 'lang' attribute replaced by 'xml:lang' in XHTML 1.1
366 def visit_meta(self, node):
367 if node.hasattr('lang'):
368 node['xml:lang'] = node['lang']
369 del(node['lang'])
370 meta = self.emptytag(node, 'meta', **node.non_default_attributes())
371 self.add_meta(meta)
374 # option-list as definition list, styled with CSS
375 # ----------------------------------------------
377 def visit_option_list(self, node):
378 self.body.append(
379 self.starttag(node, 'dl', CLASS='option-list'))
381 def depart_option_list(self, node):
382 self.body.append('</dl>\n')
384 def visit_option_list_item(self, node):
385 pass
387 def depart_option_list_item(self, node):
388 pass
390 def visit_option_group(self, node):
391 self.body.append(self.starttag(node, 'dt', ''))
392 self.body.append('<kbd>')
394 def depart_option_group(self, node):
395 self.body.append('</kbd></dt>\n')
397 def visit_option(self, node):
398 self.body.append(self.starttag(node, 'span', '', CLASS='option'))
400 def depart_option(self, node):
401 self.body.append('</span>')
402 if isinstance(node.next_node(descend=False, siblings=True),
403 nodes.option):
404 self.body.append(', ')
406 def visit_description(self, node):
407 self.body.append(self.starttag(node, 'dd', ''))
409 def depart_description(self, node):
410 self.body.append('</dd>\n')
412 # Do not omit <p> tags
413 # --------------------
415 # The HTML4CSS1 writer does this to "produce
416 # visually compact lists (less vertical whitespace)". This writer
417 # relies on CSS rules for"visual compactness".
419 # * In XHTML 1.1, e.g. a <blockquote> element may not contain
420 # character data, so you cannot drop the <p> tags.
421 # * Keeping simple paragraphs in the field_body enables a CSS
422 # rule to start the field-body on new line if the label is too long
423 # * it makes the code simpler.
425 # TODO: omit paragraph tags in simple table cells.
427 def visit_paragraph(self, node):
428 self.body.append(self.starttag(node, 'p', ''))
430 def depart_paragraph(self, node):
431 self.body.append('</p>')
432 if not (isinstance(node.parent, (nodes.list_item, nodes.entry)) and
433 # (node is node.parent[-1])
434 (len(node.parent) == 1)
436 self.body.append('\n')
438 # tables
439 # ------
440 # no hard-coded border setting in the table head::
442 def visit_table(self, node):
443 classes = [cls.strip(u' \t\n')
444 for cls in self.settings.table_style.split(',')]
445 tag = self.starttag(node, 'table', CLASS=' '.join(classes))
446 self.body.append(tag)
448 def depart_table(self, node):
449 self.body.append('</table>\n')
451 # no hard-coded vertical alignment in table body::
453 def visit_tbody(self, node):
454 self.write_colspecs()
455 self.body.append(self.context.pop()) # '</colgroup>\n' or ''
456 self.body.append(self.starttag(node, 'tbody'))
459 class SimpleListChecker(html4css1.SimpleListChecker):
462 Raise `nodes.NodeFound` if non-simple list item is encountered.
464 Here "simple" means a list item containing nothing other than a single
465 paragraph, a simple list, or a paragraph followed by a simple list.
467 This version also checks for simple field lists and docinfo.
469 # # debugging: copy of parent methods with `print` calls
470 # def default_visit(self, node):
471 # print "found", node.__class__, "in", node.parent.__class__
472 # raise nodes.NodeFound
474 def _pass_node(self, node):
475 pass
477 def _simple_node(self, node):
478 # nodes that are never complex (can contain only inline nodes)
479 raise nodes.SkipNode
481 def visit_list_item(self, node):
482 # print "visiting list item", node.__class__
483 children = []
484 for child in node.children:
485 if not isinstance(child, nodes.Invisible):
486 children.append(child)
487 # print "has %s visible children" % len(children)
488 if (children and isinstance(children[0], nodes.paragraph)
489 and (isinstance(children[-1], nodes.bullet_list) or
490 isinstance(children[-1], nodes.enumerated_list) or
491 isinstance(children[-1], nodes.field_list))):
492 children.pop()
493 # print "%s children remain" % len(children)
494 if len(children) <= 1:
495 return
496 else:
497 # print "found", child.__class__, "in", node.__class__
498 raise nodes.NodeFound
500 # Docinfo nodes:
501 visit_docinfo = _pass_node
502 visit_author = _simple_node
503 visit_authors = visit_list_item
504 visit_address = visit_list_item
505 visit_contact = _pass_node
506 visit_copyright = _simple_node
507 visit_date = _simple_node
508 visit_organization = _simple_node
509 visit_status = _simple_node
510 visit_version = visit_list_item
512 # Field list items
513 visit_field_list = _pass_node
514 visit_field = _pass_node
515 # the field body corresponds to a list item
516 # visit_field_body = html4css1.SimpleListChecker.visit_list_item
517 visit_field_body = visit_list_item
518 visit_field_name = html4css1.SimpleListChecker.invisible_visit
520 # Inline nodes
521 visit_Text = _pass_node