From fa7c1de04ad5c65ed3c81976c153b8e32ff5f011 Mon Sep 17 00:00:00 2001 From: milde Date: Tue, 10 Mar 2015 14:15:17 +0000 Subject: [PATCH] New basic HTML writer: generates polyglott HTML 5 / XHTML 1.1 (transitional) git-svn-id: http://svn.code.sf.net/p/docutils/code/trunk/docutils@7815 929543f6-e4f2-0310-98a6-ba3bd3dd1d04 --- docutils/writers/html_base/__init__.py | 1721 ++++++++++++++++++++ docutils/writers/{html4css1 => html_base}/math.css | 0 docutils/writers/html_base/template.txt | 8 + tools/rst2html5.py | 35 + 4 files changed, 1764 insertions(+) create mode 100644 docutils/writers/html_base/__init__.py rename docutils/writers/{html4css1 => html_base}/math.css (100%) create mode 100644 docutils/writers/html_base/template.txt create mode 100755 tools/rst2html5.py diff --git a/docutils/writers/html_base/__init__.py b/docutils/writers/html_base/__init__.py new file mode 100644 index 000000000..19f33312c --- /dev/null +++ b/docutils/writers/html_base/__init__.py @@ -0,0 +1,1721 @@ +# .. coding: utf8 +# :Author: Günter Milde +# :Revision: $Revision$ +# :Date: $Date: 2005-06-28$ +# :Copyright: © 2005, 2009 Günter Milde. +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause + +# Use "best practice" as recommended by the W3C: +# http://www.w3.org/2009/cheatsheet/ + + +""" +Basic HyperText Markup Language document tree Writer. + +The output conforms to the `HTML 5` specification as well as +to `XHTML 1.0 transitional`. + +The cascading style sheet "html-base.css" is required for proper viewing. +""" +__docformat__ = 'reStructuredText' + +import sys +import os +import os.path +import re +import urllib +try: # check for the Python Imaging Library + import PIL.Image +except ImportError: + try: # sometimes PIL modules are put in PYTHONPATH's root + import Image + class PIL(object): pass # dummy wrapper + PIL.Image = Image + except ImportError: + PIL = None +import docutils +from docutils import frontend, nodes, utils, writers, languages, io +from docutils.utils.error_reporting import SafeString +from docutils.transforms import writer_aux +from docutils.utils.math import unichar2tex, pick_math_environment, math2html +from docutils.utils.math.latex2mathml import parse_latex_math + +class Writer(writers.Writer): + + supported = ('html', 'html5', 'xhtml') + """Formats this writer supports.""" + + default_stylesheets = ['html-base.css'] + default_stylesheet_dirs = ['.', os.path.abspath(os.path.dirname(__file__))] + + default_template = 'template.txt' + default_template_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), default_template) + + settings_spec = ( + 'HTML-Specific Options', + None, + (('Specify the template file (UTF-8 encoded). Default is "%s".' + % default_template_path, + ['--template'], + {'default': default_template_path, 'metavar': ''}), + ('Comma separated list of stylesheet URLs. ' + 'Overrides previous --stylesheet and --stylesheet-path settings.', + ['--stylesheet'], + {'metavar': '', 'overrides': 'stylesheet_path', + 'validator': frontend.validate_comma_separated_list}), + ('Comma separated list of stylesheet paths. ' + 'Relative paths are expanded if a matching file is found in ' + 'the --stylesheet-dirs. With --link-stylesheet, ' + 'the path is rewritten relative to the output HTML file. ' + 'Default: "%s"' % ','.join(default_stylesheets), + ['--stylesheet-path'], + {'metavar': '', 'overrides': 'stylesheet', + 'validator': frontend.validate_comma_separated_list, + 'default': default_stylesheets}), + ('Embed the stylesheet(s) in the output HTML file. The stylesheet ' + 'files must be accessible during processing. This is the default.', + ['--embed-stylesheet'], + {'default': 1, 'action': 'store_true', + 'validator': frontend.validate_boolean}), + ('Link to the stylesheet(s) in the output HTML file. ' + 'Default: embed stylesheets.', + ['--link-stylesheet'], + {'dest': 'embed_stylesheet', 'action': 'store_false'}), + ('Comma-separated list of directories where stylesheets are found. ' + 'Used by --stylesheet-path when expanding relative path arguments. ' + 'Default: "%s"' % default_stylesheet_dirs, + ['--stylesheet-dirs'], + {'metavar': '', + 'validator': frontend.validate_comma_separated_list, + 'default': default_stylesheet_dirs}), + ('Specify the initial header level. Default is 1 for "

". ' + 'Does not affect document title & subtitle (see --no-doc-title).', + ['--initial-header-level'], + {'choices': '1 2 3 4 5 6'.split(), 'default': '1', + 'metavar': ''}), + ('Format for footnote references: one of "superscript" or ' + '"brackets". Default is "brackets".', + ['--footnote-references'], + {'choices': ['superscript', 'brackets'], 'default': 'brackets', + 'metavar': '', + 'overrides': 'trim_footnote_reference_space'}), + ('Format for block quote attributions: one of "dash" (em-dash ' + 'prefix), "parentheses"/"parens", or "none". Default is "dash".', + ['--attribution'], + {'choices': ['dash', 'parentheses', 'parens', 'none'], + 'default': 'dash', 'metavar': ''}), + ('Remove extra vertical whitespace between items of "simple" bullet ' + 'lists and enumerated lists. Default: enabled.', + ['--compact-lists'], + {'default': True, 'action': 'store_true', + 'validator': frontend.validate_boolean}), + ('Disable compact simple bullet and enumerated lists.', + ['--no-compact-lists'], + {'dest': 'compact_lists', 'action': 'store_false'}), + ('Remove extra vertical whitespace between items of simple field ' + 'lists. Default: enabled.', + ['--compact-field-lists'], + {'default': True, 'action': 'store_true', + 'validator': frontend.validate_boolean}), + ('Disable compact simple field lists.', + ['--no-compact-field-lists'], + {'dest': 'compact_field_lists', 'action': 'store_false'}), + ('Added to standard table classes. ' + 'Defined styles: "borderless". Default: ""', + ['--table-style'], + {'default': ''}), + ('Math output format (one of "MathML", "HTML", "MathJax" ' + 'or "LaTeX") and options(s). Default: "HTML math.css"', + ['--math-output'], + {'default': 'HTML math.css'}), + ('Omit the XML declaration. Must be true for HTML5 conformance.', + ['--no-xml-declaration'], + {'dest': 'xml_declaration', 'default': False, + 'action': 'store_false', 'validator': frontend.validate_boolean}), + ('Obfuscate email addresses to confuse harvesters while still ' + 'keeping email links usable with standards-compliant browsers.', + ['--cloak-email-addresses'], + {'action': 'store_true', 'validator': frontend.validate_boolean}),)) + + settings_defaults = {'output_encoding_error_handler': 'xmlcharrefreplace'} + + config_section = 'html-base writer' + config_section_dependencies = ('writers',) + + visitor_attributes = ( + 'head_prefix', 'head', 'stylesheet', 'body_prefix', + 'body_pre_docinfo', 'docinfo', 'body', 'body_suffix', + 'title', 'subtitle', 'header', 'footer', 'meta', 'fragment', + 'html_prolog', 'html_head', 'html_title', 'html_subtitle', + 'html_body') + + def get_transforms(self): + return writers.Writer.get_transforms(self) + [writer_aux.Admonitions] + + def __init__(self): + writers.Writer.__init__(self) + self.translator_class = HTMLTranslator + + def translate(self): + self.visitor = visitor = self.translator_class(self.document) + self.document.walkabout(visitor) + for attr in self.visitor_attributes: + setattr(self, attr, getattr(visitor, attr)) + self.output = self.apply_template() + + def apply_template(self): + template_file = open(self.document.settings.template, 'rb') + template = unicode(template_file.read(), 'utf-8') + template_file.close() + subs = self.interpolation_dict() + return template % subs + + def interpolation_dict(self): + subs = {} + settings = self.document.settings + for attr in self.visitor_attributes: + subs[attr] = ''.join(getattr(self, attr)).rstrip('\n') + subs['encoding'] = settings.output_encoding + subs['version'] = docutils.__version__ + return subs + + def assemble_parts(self): + writers.Writer.assemble_parts(self) + for part in self.visitor_attributes: + self.parts[part] = ''.join(getattr(self, part)) + + +class HTMLTranslator(nodes.NodeVisitor): + + """ + This writer generates `polyglott markup`: HTML 5 that is also valid XML. + """ + + xml_declaration = '\n' + doctype = ( + '\n') + doctype_mathml = doctype + + head_prefix_template = ('\n\n') + content_type = ('\n') + content_type_mathml = ('\n') + + generator = ('\n') + + # Template for the MathJax script in the header: + mathjax_script = '\n' + # The latest version of MathJax from the distributed server: + # avaliable to the public under the `MathJax CDN Terms of Service`__ + # __http://www.mathjax.org/download/mathjax-cdn-terms-of-service/ + mathjax_url = ('http://cdn.mathjax.org/mathjax/latest/MathJax.js?' + 'config=TeX-AMS-MML_HTMLorMML') + # may be overwritten by custom URL appended to "mathjax" + + stylesheet_link = '\n' + embedded_stylesheet = '\n' + words_and_spaces = re.compile(r'\S+| +|\n') + sollbruchstelle = re.compile(r'.+\W\W.+|[-?].+', re.U) # wrap point inside word + lang_attribute = 'lang' # name changes to 'xml:lang' in XHTML 1.1 + + def __init__(self, document): + nodes.NodeVisitor.__init__(self, document) + self.settings = settings = document.settings + lcode = settings.language_code + self.language = languages.get_language(lcode, document.reporter) + self.meta = [self.generator % docutils.__version__] + self.head_prefix = [] + self.html_prolog = [] + if settings.xml_declaration: + self.head_prefix.append(self.xml_declaration + % settings.output_encoding) + # encoding not interpolated: + self.html_prolog.append(self.xml_declaration) + self.head = self.meta[:] + self.stylesheet = [self.stylesheet_call(path) + for path in utils.get_stylesheet_list(settings)] + self.body_prefix = ['\n\n'] + # document title, subtitle display + self.body_pre_docinfo = [] + # author, date, etc. + self.docinfo = [] + self.body = [] + self.fragment = [] + self.body_suffix = ['\n\n'] + self.section_level = 0 + self.initial_header_level = int(settings.initial_header_level) + + self.math_output = settings.math_output.split() + self.math_output_options = self.math_output[1:] + self.math_output = self.math_output[0].lower() + + # A heterogenous stack used in conjunction with the tree traversal. + # Make sure that the pops correspond to the pushes: + self.context = [] + + self.topic_classes = [] # TODO: replace with self_in_contents + self.colspecs = [] + self.compact_p = True + self.compact_simple = False + self.compact_field_list = False + self.in_docinfo = False + self.in_sidebar = False + self.in_footnote_list = False + self.title = [] + self.subtitle = [] + self.header = [] + self.footer = [] + self.html_head = [self.content_type] # charset not interpolated + self.html_title = [] + self.html_subtitle = [] + self.html_body = [] + self.in_document_title = 0 # len(self.body) or 0 + self.in_mailto = False + self.author_in_authors = False + self.math_header = [] + + def astext(self): + return ''.join(self.head_prefix + self.head + + self.stylesheet + self.body_prefix + + self.body_pre_docinfo + self.docinfo + + self.body + self.body_suffix) + + def encode(self, text): + """Encode special characters in `text` & return.""" + # @@@ A codec to do these and all other HTML entities would be nice. + text = unicode(text) + return text.translate({ + ord('&'): u'&', + ord('<'): u'<', + ord('"'): u'"', + ord('>'): u'>', + ord('@'): u'@', # may thwart some address harvesters + }) + + def cloak_mailto(self, uri): + """Try to hide a mailto: URL from harvesters.""" + # Encode "@" using a URL octet reference (see RFC 1738). + # Further cloaking with HTML entities will be done in the + # `attval` function. + return uri.replace('@', '%40') + + def cloak_email(self, addr): + """Try to hide the link text of a email link from harversters.""" + # Surround at-signs and periods with tags. ("@" has + # already been encoded to "@" by the `encode` method.) + addr = addr.replace('@', '@') + addr = addr.replace('.', '.') + return addr + + def attval(self, text, + whitespace=re.compile('[\n\r\t\v\f]')): + """Cleanse, HTML encode, and return attribute value text.""" + encoded = self.encode(whitespace.sub(' ', text)) + if self.in_mailto and self.settings.cloak_email_addresses: + # Cloak at-signs ("%40") and periods with HTML entities. + encoded = encoded.replace('%40', '%40') + encoded = encoded.replace('.', '.') + return encoded + + def stylesheet_call(self, path): + """Return code to reference or embed stylesheet file `path`""" + if self.settings.embed_stylesheet: + try: + content = io.FileInput(source_path=path, + encoding='utf-8').read() + self.settings.record_dependencies.add(path) + except IOError, err: + msg = u"Cannot embed stylesheet '%s': %s." % ( + path, SafeString(err.strerror)) + self.document.reporter.error(msg) + return '<--- %s --->\n' % msg + return self.embedded_stylesheet % content + # else link to style file: + if self.settings.stylesheet_path: + # adapt path relative to output (cf. config.html#stylesheet-path) + path = utils.relative_path(self.settings._destination, path) + return self.stylesheet_link % self.encode(path) + + def starttag(self, node, tagname, suffix='\n', empty=False, **attributes): + """ + Construct and return a start tag given a node (id & class attributes + are extracted), tag name, and optional attributes. + """ + tagname = tagname.lower() + prefix = [] + atts = {} + ids = [] + for (name, value) in attributes.items(): + atts[name.lower()] = value + classes = [] + languages = [] + # unify class arguments and move language specification + for cls in node.get('classes', []) + atts.pop('class', '').split() : + if cls.startswith('language-'): + languages.append(cls[9:]) + elif cls.strip() and cls not in classes: + classes.append(cls) + if languages: + # attribute name is 'lang' in XHTML 1.0 but 'xml:lang' in 1.1 + atts[self.lang_attribute] = languages[0] + if classes: + atts['class'] = ' '.join(classes) + assert 'id' not in atts + ids.extend(node.get('ids', [])) + if 'ids' in atts: + ids.extend(atts['ids']) + del atts['ids'] + if ids: + atts['id'] = ids[0] + for id in ids[1:]: + # Add empty "span" elements for additional IDs. Note + # that we cannot use empty "a" elements because there + # may be targets inside of references, but nested "a" + # elements aren't allowed in XHTML (even if they do + # not all have a "href" attribute). + if empty: + # Empty tag. Insert target right in front of element. + prefix.append('' % id) + else: + # Non-empty tag. Place the auxiliary tag + # *inside* the element, as the first child. + suffix += '' % id + attlist = atts.items() + attlist.sort() + parts = [tagname] + for name, value in attlist: + # value=None was used for boolean attributes without + # value, but this isn't supported by XHTML. + assert value is not None + if isinstance(value, list): + values = [unicode(v) for v in value] + parts.append('%s="%s"' % (name.lower(), + self.attval(' '.join(values)))) + else: + parts.append('%s="%s"' % (name.lower(), + self.attval(unicode(value)))) + if empty: + infix = ' /' + else: + infix = '' + return ''.join(prefix) + '<%s%s>' % (' '.join(parts), infix) + suffix + + def emptytag(self, node, tagname, suffix='\n', **attributes): + """Construct and return an XML-compatible empty tag.""" + return self.starttag(node, tagname, suffix, empty=True, **attributes) + + def set_class_on_child(self, node, class_, index=0): + """ + Set class `class_` on the visible child no. index of `node`. + Do nothing if node has fewer children than `index`. + """ + children = [n for n in node if not isinstance(n, nodes.Invisible)] + try: + child = children[index] + except IndexError: + return + child['classes'].append(class_) + + def set_first_last(self, node): + pass + # TODO: remove calls to this function + + def visit_Text(self, node): + text = node.astext() + encoded = self.encode(text) + if self.in_mailto and self.settings.cloak_email_addresses: + encoded = self.cloak_email(encoded) + self.body.append(encoded) + + def depart_Text(self, node): + pass + + def visit_abbreviation(self, node): + # @@@ implementation incomplete ("title" attribute) + self.body.append(self.starttag(node, 'abbr', '')) + + def depart_abbreviation(self, node): + self.body.append('') + + def visit_acronym(self, node): + # @@@ implementation incomplete ("title" attribute) + self.body.append(self.starttag(node, 'abbr', '')) + + def depart_acronym(self, node): + self.body.append('') + + def visit_address(self, node): + self.visit_docinfo_item(node, 'address', meta=False) + self.body.append(self.starttag(node, 'pre', '', CLASS='address')) + + def depart_address(self, node): + self.body.append('\n\n') + self.depart_docinfo_item() + + def visit_admonition(self, node): + node['classes'].insert(0, 'admonition') + self.body.append(self.starttag(node, 'div')) + self.set_first_last(node) + + def depart_admonition(self, node=None): + self.body.append('\n') + + attribution_formats = {'dash': ('—', ''), + 'parentheses': ('(', ')'), + 'parens': ('(', ')'), + 'none': ('', '')} + + def visit_attribution(self, node): + prefix, suffix = self.attribution_formats[self.settings.attribution] + self.context.append(suffix) + self.body.append( + self.starttag(node, 'p', prefix, CLASS='attribution')) + + def depart_attribution(self, node): + self.body.append(self.context.pop() + '

\n') + + # author, authors + # --------------- + # Use paragraphs instead of hard-coded linebreaks. + + def visit_author(self, node): + if not(isinstance(node.parent, nodes.authors)): + self.visit_docinfo_item(node, 'author') + self.body.append('

') + + def depart_author(self, node): + self.body.append('

') + if isinstance(node.parent, nodes.authors): + self.body.append('\n') + else: + self.depart_docinfo_item() + + def visit_authors(self, node): + self.visit_docinfo_item(node, 'authors', meta=False) + + def depart_authors(self, node): + self.depart_docinfo_item() + + def visit_block_quote(self, node): + self.body.append(self.starttag(node, 'blockquote')) + + def depart_block_quote(self, node): + self.body.append('\n') + + def check_simple_list(self, node): + """Check for a simple list that can be rendered compactly.""" + visitor = SimpleListChecker(self.document) + try: + node.walk(visitor) + except nodes.NodeFound: + return None + else: + return 1 + + # Compact lists + # ------------ + # Include definition lists and field lists (in addition to ordered + # and unordered lists) in the test if a list is "simple" (cf. the + # html4css1.HTMLTranslator docstring and the SimpleListChecker class at + # the end of this file). + + def is_compactable(self, node): + # print "is_compactable %s ?" % node.__class__, + # explicite class arguments have precedence + if 'compact' in node['classes']: + # print "explicitely compact" + return True + if 'open' in node['classes']: + # print "explicitely open" + return False + # check config setting: + if (isinstance(node, nodes.field_list) or + isinstance(node, nodes.definition_list) + ) and not self.settings.compact_field_lists: + # print "`compact-field-lists` is False" + return False + if (isinstance(node, nodes.enumerated_list) or + isinstance(node, nodes.bullet_list) + ) and not self.settings.compact_lists: + # print "`compact-lists` is False" + return False + # more special cases: + if (self.topic_classes == ['contents']): # TODO: self.in_contents + return True + # check the list items: + visitor = SimpleListChecker(self.document) + try: + node.walk(visitor) + except nodes.NodeFound: + # print "complex node" + return False + else: + # print "simple list" + return True + + def visit_bullet_list(self, node): + atts = {} + old_compact_simple = self.compact_simple + self.context.append((self.compact_simple, self.compact_p)) + self.compact_p = None + self.compact_simple = self.is_compactable(node) + if self.compact_simple and not old_compact_simple: + atts['class'] = 'simple' + self.body.append(self.starttag(node, 'ul', **atts)) + + def depart_bullet_list(self, node): + self.compact_simple, self.compact_p = self.context.pop() + self.body.append('\n') + + def visit_caption(self, node): + self.body.append(self.starttag(node, 'p', '', CLASS='caption')) + + def depart_caption(self, node): + self.body.append('

\n') + + # citations + # --------- + # Use definition list instead of table for bibliographic references. + # Join adjacent citation entries. + + def visit_citation(self, node): + if not self.in_footnote_list: + self.body.append('
\n') + self.in_footnote_list = True + + def depart_citation(self, node): + self.body.append('\n') + if not isinstance(node.next_node(descend=False, siblings=True), + nodes.citation): + self.body.append('
\n') + self.in_footnote_list = False + + def visit_citation_reference(self, node): + href = '#' + if 'refid' in node: + href += node['refid'] + elif 'refname' in node: + href += self.document.nameids[node['refname']] + # else: # TODO system message (or already in the transform)? + # 'Citation reference missing.' + self.body.append(self.starttag( + node, 'a', '[', CLASS='citation-reference', href=href)) + + def depart_citation_reference(self, node): + self.body.append(']') + + # classifier + # ---------- + # don't insert classifier-delimiter here (done by CSS) + + def visit_classifier(self, node): + self.body.append(self.starttag(node, 'span', '', CLASS='classifier')) + + def depart_classifier(self, node): + self.body.append('
') + + def visit_colspec(self, node): + self.colspecs.append(node) + # "stubs" list is an attribute of the tgroup element: + node.parent.stubs.append(node.attributes.get('stub')) + + def depart_colspec(self, node): + pass + + def write_colspecs(self): + width = 0 + for node in self.colspecs: + width += node['colwidth'] + for node in self.colspecs: + colwidth = int(node['colwidth'] * 100.0 / width + 0.5) + self.body.append(self.emptytag(node, 'col', + style='width: %i%%' % colwidth)) + self.colspecs = [] + + def visit_comment(self, node, + sub=re.compile('-(?=-)').sub): + """Escape double-dashes in comment text.""" + self.body.append('\n' % sub('- ', node.astext())) + # Content already processed: + raise nodes.SkipNode + + def visit_compound(self, node): + self.body.append(self.starttag(node, 'div', CLASS='compound')) + if len(node) > 1: + node[0]['classes'].append('compound-first') + node[-1]['classes'].append('compound-last') + for child in node[1:-1]: + child['classes'].append('compound-middle') + + def depart_compound(self, node): + self.body.append('\n') + + def visit_container(self, node): + self.body.append(self.starttag(node, 'div', CLASS='docutils container')) + + def depart_container(self, node): + self.body.append('\n') + + def visit_contact(self, node): + self.visit_docinfo_item(node, 'contact', meta=False) + + def depart_contact(self, node): + self.depart_docinfo_item() + + def visit_copyright(self, node): + self.visit_docinfo_item(node, 'copyright', meta=False) + + def depart_copyright(self, node): + self.depart_docinfo_item() + + def visit_date(self, node): + self.visit_docinfo_item(node, 'date', meta=False) + + def depart_date(self, node): + self.depart_docinfo_item() + + def visit_decoration(self, node): + pass + + def depart_decoration(self, node): + pass + + def visit_definition(self, node): + self.body.append('\n') + self.body.append(self.starttag(node, 'dd', '')) + self.set_first_last(node) + + def depart_definition(self, node): + self.body.append('\n') + + def visit_definition_list(self, node): + classes = node.setdefault('classes', []) + if self.is_compactable(node): + classes.append('simple') + self.body.append(self.starttag(node, 'dl')) + + def depart_definition_list(self, node): + self.body.append('\n') + + def visit_definition_list_item(self, node): + # pass class arguments, ids and names to definition term: + node.children[0]['classes'] = ( + node.get('classes', []) + node.children[0].get('classes', [])) + node.children[0]['ids'] = ( + node.get('ids', []) + node.children[0].get('ids', [])) + node.children[0]['names'] = ( + node.get('names', []) + node.children[0].get('names', [])) + + def depart_definition_list_item(self, node): + pass + + def visit_description(self, node): + self.body.append(self.starttag(node, 'dd', '')) + + def depart_description(self, node): + self.body.append('\n') + + + # docinfo + # ------- + # use definition list instead of table + + def visit_docinfo(self, node): + classes = 'docinfo' + if (self.is_compactable(node)): + classes += ' simple' + self.body.append(self.starttag(node, 'dl', CLASS=classes)) + + def depart_docinfo(self, node): + self.body.append('\n') + + def visit_docinfo_item(self, node, name, meta=True): + if meta: + meta_tag = '\n' \ + % (name, self.attval(node.astext())) + self.add_meta(meta_tag) + self.body.append('
%s
\n' + % (name, self.language.labels[name])) + self.body.append(self.starttag(node, 'dd', '', CLASS=name)) + + def depart_docinfo_item(self): + self.body.append('\n') + + # TODO: RSt-parser should treat this as code-block with class "pycon". + def visit_doctest_block(self, node): + self.body.append(self.starttag(node, 'pre', suffix='', + CLASS='code pycon doctest-block')) + + def depart_doctest_block(self, node): + self.body.append('\n\n') + + def visit_document(self, node): + self.head.append('%s\n' + % self.encode(node.get('title', ''))) + + def depart_document(self, node): + self.head_prefix.extend([self.doctype, + self.head_prefix_template % + {'lang': self.settings.language_code}]) + self.html_prolog.append(self.doctype) + self.meta.insert(0, self.content_type % self.settings.output_encoding) + self.head.insert(0, self.content_type % self.settings.output_encoding) + if self.math_header: + if self.math_output == 'mathjax': + self.head.extend(self.math_header) + else: + self.stylesheet.extend(self.math_header) + # skip content-type meta tag with interpolated charset value: + self.html_head.extend(self.head[1:]) + self.body_prefix.append(self.starttag(node, 'div', CLASS='document')) + self.body_suffix.insert(0, '\n') + self.fragment.extend(self.body) # self.fragment is the "naked" body + self.html_body.extend(self.body_prefix[1:] + self.body_pre_docinfo + + self.docinfo + self.body + + self.body_suffix[:-1]) + assert not self.context, 'len(context) = %s' % len(self.context) + + def visit_emphasis(self, node): + self.body.append(self.starttag(node, 'em', '')) + + def depart_emphasis(self, node): + self.body.append('') + + def visit_entry(self, node): + atts = {'class': []} + if isinstance(node.parent.parent, nodes.thead): + atts['class'].append('head') + if node.parent.parent.parent.stubs[node.parent.column]: + # "stubs" list is an attribute of the tgroup element + atts['class'].append('stub') + if atts['class']: + tagname = 'th' + atts['class'] = ' '.join(atts['class']) + else: + tagname = 'td' + del atts['class'] + node.parent.column += 1 + if 'morerows' in node: + atts['rowspan'] = node['morerows'] + 1 + if 'morecols' in node: + atts['colspan'] = node['morecols'] + 1 + node.parent.column += node['morecols'] + self.body.append(self.starttag(node, tagname, '', **atts)) + self.context.append('\n' % tagname.lower()) + if len(node) == 0: # empty cell + self.body.append(' ') + self.set_first_last(node) + + def depart_entry(self, node): + self.body.append(self.context.pop()) + + def visit_enumerated_list(self, node): + """ + The 'start' attribute does not conform to HTML 4.01's strict.dtd, but + CSS1 doesn't help. CSS2 isn't widely enough supported yet to be + usable. + """ + atts = {} + if 'start' in node: + atts['start'] = node['start'] + if 'enumtype' in node: + atts['class'] = node['enumtype'] + # @@@ To do: prefix, suffix. How? Change prefix/suffix to a + # single "format" attribute? Use CSS2? + old_compact_simple = self.compact_simple + self.context.append((self.compact_simple, self.compact_p)) + self.compact_p = None + self.compact_simple = self.is_compactable(node) + if self.compact_simple and not old_compact_simple: + atts['class'] = (atts.get('class', '') + ' simple').strip() + self.body.append(self.starttag(node, 'ol', **atts)) + + def depart_enumerated_list(self, node): + self.compact_simple, self.compact_p = self.context.pop() + self.body.append('\n') + + # field-list + # ---------- + # set as definition list, styled with CSS + + def visit_field_list(self, node): + # Keep simple paragraphs in the field_body to enable CSS + # rule to start body on new line if the label is too long + classes = 'field-list' + if (self.is_compactable(node)): + classes += ' simple' + self.body.append(self.starttag(node, 'dl', CLASS=classes)) + + def depart_field_list(self, node): + self.body.append('\n') + + def visit_field(self, node): + pass + + def depart_field(self, node): + pass + + def visit_field_name(self, node): + self.body.append(self.starttag(node, 'dt', '')) + + def depart_field_name(self, node): + self.body.append('\n') + + def visit_field_body(self, node): + self.body.append(self.starttag(node, 'dd', '')) + + def depart_field_body(self, node): + self.body.append('\n') + + def visit_figure(self, node): + atts = {'class': 'figure'} + if node.get('width'): + atts['style'] = 'width: %s' % node['width'] + if node.get('align'): + atts['class'] += " align-" + node['align'] + self.body.append(self.starttag(node, 'div', **atts)) + + def depart_figure(self, node): + self.body.append('\n') + + # use HTML 5

\n' + self.in_document_title = len(self.body) + else: + assert isinstance(node.parent, nodes.section) + h_level = self.section_level + self.initial_header_level - 1 + atts = {} + if (len(node.parent) >= 2 and + isinstance(node.parent[1], nodes.subtitle)): + atts['CLASS'] = 'with-subtitle' + self.body.append( + self.starttag(node, 'h%s' % h_level, '', **atts)) + atts = {} + if node.hasattr('refid'): + atts['class'] = 'toc-backref' + atts['href'] = '#' + node['refid'] + if atts: + self.body.append(self.starttag({}, 'a', '', **atts)) + close_tag = '\n' % (h_level) + else: + close_tag = '\n' % (h_level) + self.context.append(close_tag) + + def depart_title(self, node): + self.body.append(self.context.pop()) + if self.in_document_title: + self.title = self.body[self.in_document_title:-1] + self.in_document_title = 0 + self.body_pre_docinfo.extend(self.body) + self.html_title.extend(self.body) + del self.body[:] + + def visit_title_reference(self, node): + self.body.append(self.starttag(node, 'cite', '')) + + def depart_title_reference(self, node): + self.body.append('') + + def visit_topic(self, node): + self.body.append(self.starttag(node, 'div', CLASS='topic')) + self.topic_classes = node['classes'] + # TODO: replace with :: + # self.in_contents = 'contents' in node['classes'] + + def depart_topic(self, node): + self.body.append('\n') + self.topic_classes = [] + # TODO self.in_contents = False + + def visit_transition(self, node): + self.body.append(self.emptytag(node, 'hr', CLASS='docutils')) + + def depart_transition(self, node): + pass + + def visit_version(self, node): + self.visit_docinfo_item(node, 'version', meta=False) + + def depart_version(self, node): + self.depart_docinfo_item() + + def unimplemented_visit(self, node): + raise NotImplementedError('visiting unimplemented node type: %s' + % node.__class__.__name__) + + +class SimpleListChecker(nodes.GenericNodeVisitor): + + """ + Raise `nodes.NodeFound` if non-simple list item is encountered. + + Here "simple" means a list item containing nothing other than a single + paragraph, a simple list, or a paragraph followed by a simple list. + + This version also checks for simple field lists and docinfo. + """ + + def default_visit(self, node): + raise nodes.NodeFound + + def visit_list_item(self, node): + # print "visiting list item", node.__class__ + children = [child for child in node.children + if not isinstance(child, nodes.Invisible)] + # print "has %s visible children" % len(children) + if (children and isinstance(children[0], nodes.paragraph) + and (isinstance(children[-1], nodes.bullet_list) or + isinstance(children[-1], nodes.enumerated_list) or + isinstance(children[-1], nodes.field_list))): + children.pop() + # print "%s children remain" % len(children) + if len(children) <= 1: + return + else: + # print "found", child.__class__, "in", node.__class__ + raise nodes.NodeFound + + def pass_node(self, node): + pass + + def ignore_node(self, node): + # ignore nodes that are never complex (can contain only inline nodes) + raise nodes.SkipNode + + # Paragraphs and text + visit_Text = ignore_node + visit_paragraph = ignore_node + + # Lists + visit_bullet_list = pass_node + visit_enumerated_list = pass_node + visit_docinfo = pass_node + + # Docinfo nodes: + visit_author = ignore_node + visit_authors = visit_list_item + visit_address = visit_list_item + visit_contact = pass_node + visit_copyright = ignore_node + visit_date = ignore_node + visit_organization = ignore_node + visit_status = ignore_node + visit_version = visit_list_item + + # Definition list: + visit_definition_list = pass_node + visit_definition_list_item = pass_node + visit_term = ignore_node + visit_classifier = pass_node + visit_definition = visit_list_item + + # Field list: + visit_field_list = pass_node + visit_field = pass_node + # the field body corresponds to a list item + visit_field_body = visit_list_item + visit_field_name = ignore_node + + # Invisible nodes should be ignored. + visit_comment = ignore_node + visit_substitution_definition = ignore_node + visit_target = ignore_node + visit_pending = ignore_node diff --git a/docutils/writers/html4css1/math.css b/docutils/writers/html_base/math.css similarity index 100% rename from docutils/writers/html4css1/math.css rename to docutils/writers/html_base/math.css diff --git a/docutils/writers/html_base/template.txt b/docutils/writers/html_base/template.txt new file mode 100644 index 000000000..2591bce35 --- /dev/null +++ b/docutils/writers/html_base/template.txt @@ -0,0 +1,8 @@ +%(head_prefix)s +%(head)s +%(stylesheet)s +%(body_prefix)s +%(body_pre_docinfo)s +%(docinfo)s +%(body)s +%(body_suffix)s diff --git a/tools/rst2html5.py b/tools/rst2html5.py new file mode 100755 index 000000000..aa328f848 --- /dev/null +++ b/tools/rst2html5.py @@ -0,0 +1,35 @@ +#!/usr/bin/env python +# -*- coding: utf8 -*- +# :Copyright: © 2005, 2009 Günter Milde. +# :License: Released under the terms of the `2-Clause BSD license`_, in short: +# +# Copying and distribution of this file, with or without modification, +# are permitted in any medium without royalty provided the copyright +# notice and this notice are preserved. +# This file is offered as-is, without any warranty. +# +# .. _2-Clause BSD license: http://www.spdx.org/licenses/BSD-2-Clause +# +# Revision: $Revision$ +# Date: $Date$ + +""" +A minimal front end to the Docutils Publisher, producing basic HTML 5 + +The output also conforms to XHTML 1.0 transitional +(except for the doctype declaration). +""" + +try: + import locale # module missing in Jython + locale.setlocale(locale.LC_ALL, '') +except locale.Error: + pass + +from docutils.core import publish_cmdline, default_description + +description = ('Generates HTML 5 documents from standalone ' + 'reStructuredText sources ' + + default_description) + +publish_cmdline(writer_name='html-base', description=description) -- 2.11.4.GIT